Mercurial > mplayer.hg
annotate liba52/liba52_changes.diff @ 26625:5b89b42f6d50
Only compile and use libmpeg2 AltiVec code when AltiVec is available. The
AltiVec code needs -maltivec to compile, but then AltiVec instructions
appear in other places of the code causing MPlayer to sigill.
Somehow upstream libmpeg2 manages not to sigill under what appear to be
the same circumstances. Enlightenment welcome.
author | diego |
---|---|
date | Sat, 03 May 2008 15:23:22 +0000 |
parents | 236ab58453f7 |
children | 2aadf9302854 |
rev | line source |
---|---|
19249 | 1 --- include/a52.h 2006-06-12 15:04:57.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2 +++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
3 @@ -59,4 +63,9 @@ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
4 int a52_block (a52_state_t * state); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
5 void a52_free (a52_state_t * state); |
14990 | 6 |
7 +void* a52_resample_init(uint32_t mm_accel,int flags,int chans); | |
8 +extern int (* a52_resample) (float * _f, int16_t * s16); | |
9 + | |
10 +uint16_t crc16_block(uint8_t *data,uint32_t num_bytes); | |
11 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
12 #endif /* A52_H */ |
19249 | 13 --- liba52/a52_internal.h 2006-06-12 15:05:07.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
14 +++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
15 @@ -103,18 +107,34 @@ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
16 #define DELTA_BIT_NONE (2) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
17 #define DELTA_BIT_RESERVED (3) |
14990 | 18 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
19 +#ifdef ARCH_X86_64 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
20 +# define REG_a "rax" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
21 +# define REG_d "rdx" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
22 +# define REG_S "rsi" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
23 +# define REG_D "rdi" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
24 +# define REG_BP "rbp" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
25 +#else |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
26 +# define REG_a "eax" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
27 +# define REG_d "edx" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
28 +# define REG_S "esi" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
29 +# define REG_D "edi" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
30 +# define REG_BP "ebp" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
31 +#endif |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
32 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
33 void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
34 int start, int end, int fastleak, int slowleak, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
35 expbap_t * expbap); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
36 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
37 int a52_downmix_init (int input, int flags, sample_t * level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
38 sample_t clev, sample_t slev); |
14990 | 39 +void downmix_accel_init(uint32_t mm_accel); |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
40 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
41 sample_t clev, sample_t slev); |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
42 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
43 +extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
44 sample_t clev, sample_t slev); |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
45 -void a52_upmix (sample_t * samples, int acmod, int output); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
46 +extern void (*a52_upmix) (sample_t * samples, int acmod, int output); |
14990 | 47 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
48 void a52_imdct_init (uint32_t mm_accel); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
49 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
50 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
51 +extern void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
52 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias); |
19249 | 53 --- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
54 +++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
55 @@ -31,6 +35,10 @@ |
14990 | 56 |
57 #define BUFFER_SIZE 4096 | |
58 | |
59 +#ifdef ALT_BITSTREAM_READER | |
60 +int indx=0; | |
61 +#endif | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
62 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
63 void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
64 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
65 int align; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
66 @@ -38,6 +46,9 @@ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
67 align = (long)buf & 3; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
68 state->buffer_start = (uint32_t *) (buf - align); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
69 state->bits_left = 0; |
14990 | 70 +#ifdef ALT_BITSTREAM_READER |
71 + indx=0; | |
72 +#endif | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
73 bitstream_get (state, align * 8); |
14990 | 74 } |
75 | |
19249 | 76 --- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
77 +++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
78 @@ -21,6 +25,48 @@ |
14990 | 79 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
80 */ | |
81 | |
82 +/* code from ffmpeg/libavcodec */ | |
83 +#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC_ == 3 && __GNUC_MINOR__ > 0) | |
84 +# define always_inline __attribute__((always_inline)) inline | |
85 +#else | |
86 +# define always_inline inline | |
87 +#endif | |
88 + | |
89 +#if defined(__sparc__) || defined(hpux) | |
90 +/* | |
91 + * the alt bitstream reader performs unaligned memory accesses; that doesn't work | |
92 + * on sparc/hpux. For now, disable ALT_BITSTREAM_READER. | |
93 + */ | |
94 +#undef ALT_BITSTREAM_READER | |
95 +#else | |
96 +// alternative (faster) bitstram reader (reades upto 3 bytes over the end of the input) | |
97 +#define ALT_BITSTREAM_READER | |
98 + | |
23873 | 99 +/* used to avoid misaligned exceptions on some archs (alpha, ...) */ |
14990 | 100 +#if defined (ARCH_X86) || defined(ARCH_ARMV4L) |
101 +# define unaligned32(a) (*(uint32_t*)(a)) | |
102 +#else | |
103 +# ifdef __GNUC__ | |
104 +static always_inline uint32_t unaligned32(const void *v) { | |
105 + struct Unaligned { | |
106 + uint32_t i; | |
107 + } __attribute__((packed)); | |
108 + | |
109 + return ((const struct Unaligned *) v)->i; | |
110 +} | |
111 +# elif defined(__DECC) | |
112 +static inline uint32_t unaligned32(const void *v) { | |
113 + return *(const __unaligned uint32_t *) v; | |
114 +} | |
115 +# else | |
116 +static inline uint32_t unaligned32(const void *v) { | |
117 + return *(const uint32_t *) v; | |
118 +} | |
119 +# endif | |
120 +#endif //!ARCH_X86 | |
121 + | |
122 +#endif | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
123 + |
14990 | 124 /* (stolen from the kernel) */ |
125 #ifdef WORDS_BIGENDIAN | |
126 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
127 @@ -28,7 +74,7 @@ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
128 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
129 #else |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
130 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
131 -# if 0 && defined (__i386__) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
132 +# if defined (__i386__) |
14990 | 133 |
134 # define swab32(x) __i386_swab32(x) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
135 static inline const uint32_t __i386_swab32(uint32_t x) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
136 @@ -39,19 +85,34 @@ |
14990 | 137 |
138 # else | |
139 | |
140 -# define swab32(x)\ | |
141 -((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \ | |
142 - (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])) | |
143 - | |
144 +# define swab32(x) __generic_swab32(x) | |
145 + static always_inline const uint32_t __generic_swab32(uint32_t x) | |
146 + { | |
147 + return ((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | | |
148 + (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])); | |
149 + } | |
150 # endif | |
151 #endif | |
152 | |
153 +#ifdef ALT_BITSTREAM_READER | |
154 +extern int indx; | |
155 +#endif | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
156 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
157 void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
158 uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
159 int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits); |
14990 | 160 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
161 static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) |
14990 | 162 { |
163 +#ifdef ALT_BITSTREAM_READER | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
164 + uint32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) ); |
14990 | 165 + |
166 + result<<= (indx&0x07); | |
167 + result>>= 32 - num_bits; | |
168 + indx+= num_bits; | |
169 + | |
170 + return result; | |
171 +#else | |
172 uint32_t result; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
173 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
174 if (num_bits < state->bits_left) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
175 @@ -61,10 +122,29 @@ |
14990 | 176 } |
177 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
178 return a52_bitstream_get_bh (state, num_bits); |
14990 | 179 +#endif |
180 +} | |
181 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
182 +static inline void bitstream_skip(a52_state_t * state, int num_bits) |
14990 | 183 +{ |
184 +#ifdef ALT_BITSTREAM_READER | |
185 + indx+= num_bits; | |
186 +#else | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
187 + bitstream_get(state, num_bits); |
14990 | 188 +#endif |
189 } | |
190 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
191 static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) |
14990 | 192 { |
193 +#ifdef ALT_BITSTREAM_READER | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
194 + int32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) ); |
14990 | 195 + |
196 + result<<= (indx&0x07); | |
197 + result>>= 32 - num_bits; | |
198 + indx+= num_bits; | |
199 + | |
200 + return result; | |
201 +#else | |
202 int32_t result; | |
203 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
204 if (num_bits < state->bits_left) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
205 @@ -74,4 +154,5 @@ |
14990 | 206 } |
207 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
208 return a52_bitstream_get_bh_2 (state, num_bits); |
14990 | 209 +#endif |
210 } | |
19249 | 211 --- liba52/downmix.c 2006-06-12 15:17:53.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
212 +++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200 |
19373 | 213 @@ -19,18 +23,46 @@ |
14990 | 214 * You should have received a copy of the GNU General Public License |
215 * along with this program; if not, write to the Free Software | |
216 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
217 + * | |
218 + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
219 */ | |
220 | |
221 #include "config.h" | |
222 | |
223 #include <string.h> | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
224 #include <inttypes.h> |
14990 | 225 |
226 #include "a52.h" | |
227 #include "a52_internal.h" | |
228 +#include "mm_accel.h" | |
229 | |
230 #define CONVERT(acmod,output) (((output) << 3) + (acmod)) | |
231 | |
232 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
233 +void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias, |
14990 | 234 + sample_t clev, sample_t slev)= NULL; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
235 +void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL; |
14990 | 236 + |
237 +static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
238 + sample_t clev, sample_t slev); | |
239 +static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
240 + sample_t clev, sample_t slev); | |
241 +static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, | |
242 + sample_t clev, sample_t slev); | |
243 +static void upmix_MMX (sample_t * samples, int acmod, int output); | |
244 +static void upmix_C (sample_t * samples, int acmod, int output); | |
245 + | |
246 +void downmix_accel_init(uint32_t mm_accel) | |
247 +{ | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
248 + a52_upmix= upmix_C; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
249 + a52_downmix= downmix_C; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
250 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
251 + if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
252 + if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
253 + if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; |
14990 | 254 +#endif |
255 +} | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
256 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
257 int a52_downmix_init (int input, int flags, sample_t * level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
258 sample_t clev, sample_t slev) |
14990 | 259 { |
19373 | 260 @@ -447,7 +479,7 @@ |
14990 | 261 samples[i] = 0; |
262 } | |
263 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
264 -void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
265 +void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
266 sample_t clev, sample_t slev) |
14990 | 267 { |
268 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
19373 | 269 @@ -559,7 +591,7 @@ |
14990 | 270 break; |
271 | |
272 case CONVERT (A52_3F2R, A52_2F1R): | |
273 - mix3to2 (samples, bias); | |
274 + mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
275 move2to1 (samples + 768, samples + 512, bias); | |
276 break; | |
277 | |
19373 | 278 @@ -583,12 +615,12 @@ |
14990 | 279 break; |
280 | |
281 case CONVERT (A52_3F1R, A52_3F2R): | |
282 - memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); | |
283 + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
284 break; | |
285 } | |
286 } | |
287 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
288 -void a52_upmix (sample_t * samples, int acmod, int output) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
289 +void upmix_C (sample_t * samples, int acmod, int output) |
14990 | 290 { |
291 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
292 | |
19373 | 293 @@ -653,3 +685,1137 @@ |
14990 | 294 goto mix_31to21; |
295 } | |
296 } | |
297 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
298 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 299 +static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) |
300 +{ | |
301 + asm volatile( | |
302 + "movlps %2, %%xmm7 \n\t" | |
303 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
304 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 305 + ASMALIGN(4) |
14990 | 306 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
307 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
308 + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
309 + "addps (%1, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
310 + "addps 16(%1, %%"REG_S"), %%xmm1\n\t" |
14990 | 311 + "addps %%xmm7, %%xmm0 \n\t" |
312 + "addps %%xmm7, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
313 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
314 + "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
315 + "add $32, %%"REG_S" \n\t" |
14990 | 316 + " jnz 1b \n\t" |
317 + :: "r" (src+256), "r" (dest+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
318 + : "%"REG_S |
14990 | 319 + ); |
320 +} | |
321 + | |
322 +static void mix3to1_SSE (sample_t * samples, sample_t bias) | |
323 +{ | |
324 + asm volatile( | |
325 + "movlps %1, %%xmm7 \n\t" | |
326 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
327 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 328 + ASMALIGN(4) |
14990 | 329 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
330 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
331 + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
332 + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
14990 | 333 + "addps %%xmm7, %%xmm1 \n\t" |
334 + "addps %%xmm1, %%xmm0 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
335 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
336 + "add $16, %%"REG_S" \n\t" |
14990 | 337 + " jnz 1b \n\t" |
338 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
339 + : "%"REG_S |
14990 | 340 + ); |
341 +} | |
342 + | |
343 +static void mix4to1_SSE (sample_t * samples, sample_t bias) | |
344 +{ | |
345 + asm volatile( | |
346 + "movlps %1, %%xmm7 \n\t" | |
347 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
348 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 349 + ASMALIGN(4) |
14990 | 350 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
351 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
352 + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
353 + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
354 + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" |
14990 | 355 + "addps %%xmm7, %%xmm0 \n\t" |
356 + "addps %%xmm1, %%xmm0 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
357 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
358 + "add $16, %%"REG_S" \n\t" |
14990 | 359 + " jnz 1b \n\t" |
360 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
361 + : "%"REG_S |
14990 | 362 + ); |
363 +} | |
364 + | |
365 +static void mix5to1_SSE (sample_t * samples, sample_t bias) | |
366 +{ | |
367 + asm volatile( | |
368 + "movlps %1, %%xmm7 \n\t" | |
369 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
370 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 371 + ASMALIGN(4) |
14990 | 372 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
373 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
374 + "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
375 + "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
376 + "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" |
14990 | 377 + "addps %%xmm7, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
378 + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" |
14990 | 379 + "addps %%xmm1, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
380 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
381 + "add $16, %%"REG_S" \n\t" |
14990 | 382 + " jnz 1b \n\t" |
383 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
384 + : "%"REG_S |
14990 | 385 + ); |
386 +} | |
387 + | |
388 +static void mix3to2_SSE (sample_t * samples, sample_t bias) | |
389 +{ | |
390 + asm volatile( | |
391 + "movlps %1, %%xmm7 \n\t" | |
392 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
393 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 394 + ASMALIGN(4) |
14990 | 395 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
396 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
14990 | 397 + "addps %%xmm7, %%xmm0 \n\t" //common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
398 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
399 + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 400 + "addps %%xmm0, %%xmm1 \n\t" |
401 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
402 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
403 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
404 + "add $16, %%"REG_S" \n\t" |
14990 | 405 + " jnz 1b \n\t" |
406 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
407 + : "%"REG_S |
14990 | 408 + ); |
409 +} | |
410 + | |
411 +static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) | |
412 +{ | |
413 + asm volatile( | |
414 + "movlps %2, %%xmm7 \n\t" | |
415 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
416 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 417 + ASMALIGN(4) |
14990 | 418 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
419 + "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" |
14990 | 420 + "addps %%xmm7, %%xmm0 \n\t" //common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
421 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
422 + "movaps (%1, %%"REG_S"), %%xmm2 \n\t" |
14990 | 423 + "addps %%xmm0, %%xmm1 \n\t" |
424 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
425 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
426 + "movaps %%xmm2, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
427 + "add $16, %%"REG_S" \n\t" |
14990 | 428 + " jnz 1b \n\t" |
429 + :: "r" (left+256), "r" (right+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
430 + : "%"REG_S |
14990 | 431 + ); |
432 +} | |
433 + | |
434 +static void mix21toS_SSE (sample_t * samples, sample_t bias) | |
435 +{ | |
436 + asm volatile( | |
437 + "movlps %1, %%xmm7 \n\t" | |
438 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
439 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 440 + ASMALIGN(4) |
14990 | 441 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
442 + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
443 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
444 + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 445 + "addps %%xmm7, %%xmm1 \n\t" |
446 + "addps %%xmm7, %%xmm2 \n\t" | |
447 + "subps %%xmm0, %%xmm1 \n\t" | |
448 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
449 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
450 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
451 + "add $16, %%"REG_S" \n\t" |
14990 | 452 + " jnz 1b \n\t" |
453 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
454 + : "%"REG_S |
14990 | 455 + ); |
456 +} | |
457 + | |
458 +static void mix31to2_SSE (sample_t * samples, sample_t bias) | |
459 +{ | |
460 + asm volatile( | |
461 + "movlps %1, %%xmm7 \n\t" | |
462 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
463 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 464 + ASMALIGN(4) |
14990 | 465 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
466 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
467 + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" |
14990 | 468 + "addps %%xmm7, %%xmm0 \n\t" // common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
469 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
470 + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 471 + "addps %%xmm0, %%xmm1 \n\t" |
472 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
473 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
474 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
475 + "add $16, %%"REG_S" \n\t" |
14990 | 476 + " jnz 1b \n\t" |
477 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
478 + : "%"REG_S |
14990 | 479 + ); |
480 +} | |
481 + | |
482 +static void mix31toS_SSE (sample_t * samples, sample_t bias) | |
483 +{ | |
484 + asm volatile( | |
485 + "movlps %1, %%xmm7 \n\t" | |
486 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
487 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 488 + ASMALIGN(4) |
14990 | 489 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
490 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
491 + "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround |
14990 | 492 + "addps %%xmm7, %%xmm0 \n\t" // common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
493 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
494 + "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 495 + "addps %%xmm0, %%xmm1 \n\t" |
496 + "addps %%xmm0, %%xmm2 \n\t" | |
497 + "subps %%xmm3, %%xmm1 \n\t" | |
498 + "addps %%xmm3, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
499 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
500 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
501 + "add $16, %%"REG_S" \n\t" |
14990 | 502 + " jnz 1b \n\t" |
503 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
504 + : "%"REG_S |
14990 | 505 + ); |
506 +} | |
507 + | |
508 +static void mix22toS_SSE (sample_t * samples, sample_t bias) | |
509 +{ | |
510 + asm volatile( | |
511 + "movlps %1, %%xmm7 \n\t" | |
512 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
513 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 514 + ASMALIGN(4) |
14990 | 515 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
516 + "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
517 + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
518 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
519 + "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 520 + "addps %%xmm7, %%xmm1 \n\t" |
521 + "addps %%xmm7, %%xmm2 \n\t" | |
522 + "subps %%xmm0, %%xmm1 \n\t" | |
523 + "addps %%xmm0, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
524 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
525 + "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
526 + "add $16, %%"REG_S" \n\t" |
14990 | 527 + " jnz 1b \n\t" |
528 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
529 + : "%"REG_S |
14990 | 530 + ); |
531 +} | |
532 + | |
533 +static void mix32to2_SSE (sample_t * samples, sample_t bias) | |
534 +{ | |
535 + asm volatile( | |
536 + "movlps %1, %%xmm7 \n\t" | |
537 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
538 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 539 + ASMALIGN(4) |
14990 | 540 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
541 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
14990 | 542 + "addps %%xmm7, %%xmm0 \n\t" // common |
543 + "movaps %%xmm0, %%xmm1 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
544 + "addps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
545 + "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
546 + "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
547 + "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
548 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
549 + "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
550 + "add $16, %%"REG_S" \n\t" |
14990 | 551 + " jnz 1b \n\t" |
552 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
553 + : "%"REG_S |
14990 | 554 + ); |
555 +} | |
556 + | |
557 +static void mix32toS_SSE (sample_t * samples, sample_t bias) | |
558 +{ | |
559 + asm volatile( | |
560 + "movlps %1, %%xmm7 \n\t" | |
561 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
562 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 563 + ASMALIGN(4) |
14990 | 564 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
565 + "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
566 + "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" |
14990 | 567 + "addps %%xmm7, %%xmm0 \n\t" // common |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
568 + "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
569 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
570 + "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" |
14990 | 571 + "subps %%xmm2, %%xmm1 \n\t" |
572 + "addps %%xmm2, %%xmm3 \n\t" | |
573 + "addps %%xmm0, %%xmm1 \n\t" | |
574 + "addps %%xmm0, %%xmm3 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
575 + "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
576 + "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
577 + "add $16, %%"REG_S" \n\t" |
14990 | 578 + " jnz 1b \n\t" |
579 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
580 + : "%"REG_S |
14990 | 581 + ); |
582 +} | |
583 + | |
584 +static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) | |
585 +{ | |
586 + asm volatile( | |
587 + "movlps %2, %%xmm7 \n\t" | |
588 + "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
589 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 590 + ASMALIGN(4) |
14990 | 591 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
592 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
593 + "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
594 + "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
595 + "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" |
14990 | 596 + "addps %%xmm7, %%xmm0 \n\t" |
597 + "addps %%xmm7, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
598 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
599 + "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
600 + "add $32, %%"REG_S" \n\t" |
14990 | 601 + " jnz 1b \n\t" |
602 + :: "r" (src+256), "r" (dest+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
603 + : "%"REG_S |
14990 | 604 + ); |
605 +} | |
606 + | |
607 +static void zero_MMX(sample_t * samples) | |
608 +{ | |
609 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
610 + "mov $-1024, %%"REG_S" \n\t" |
14990 | 611 + "pxor %%mm0, %%mm0 \n\t" |
19373 | 612 + ASMALIGN(4) |
14990 | 613 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
614 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
615 + "movq %%mm0, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
616 + "movq %%mm0, 16(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
617 + "movq %%mm0, 24(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
618 + "add $32, %%"REG_S" \n\t" |
14990 | 619 + " jnz 1b \n\t" |
620 + "emms" | |
621 + :: "r" (samples+256) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
622 + : "%"REG_S |
14990 | 623 + ); |
624 +} | |
625 + | |
626 +/* | |
627 + I hope dest and src will be at least 8 byte aligned and size | |
628 + will devide on 8 without remain | |
629 + Note: untested and unused. | |
630 +*/ | |
631 +static void copy_MMX(void *dest,const void *src,unsigned size) | |
632 +{ | |
633 + unsigned i; | |
634 + size /= 64; | |
635 + for(i=0;i<size;i++) | |
636 + { | |
637 + __asm __volatile( | |
638 + "movq %0, %%mm0\n\t" | |
639 + "movq 8%0, %%mm1\n\t" | |
640 + "movq 16%0, %%mm2\n\t" | |
641 + "movq 24%0, %%mm3\n\t" | |
642 + "movq 32%0, %%mm4\n\t" | |
643 + "movq 40%0, %%mm5\n\t" | |
644 + "movq 48%0, %%mm6\n\t" | |
645 + "movq 56%0, %%mm7\n\t" | |
646 + "movq %%mm0, %1\n\t" | |
647 + "movq %%mm1, 8%1\n\t" | |
648 + "movq %%mm2, 16%1\n\t" | |
649 + "movq %%mm3, 24%1\n\t" | |
650 + "movq %%mm4, 32%1\n\t" | |
651 + "movq %%mm5, 40%1\n\t" | |
652 + "movq %%mm6, 48%1\n\t" | |
653 + "movq %%mm7, 56%1\n\t" | |
654 + : | |
655 + :"m"(src),"m"(dest)); | |
656 + } | |
657 +} | |
658 + | |
659 +static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
660 + sample_t clev, sample_t slev) | |
661 +{ | |
662 + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
663 + | |
664 + case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
665 + memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
666 + break; | |
667 + | |
668 + case CONVERT (A52_CHANNEL, A52_MONO): | |
669 + case CONVERT (A52_STEREO, A52_MONO): | |
670 + mix_2to1_SSE: | |
671 + mix2to1_SSE (samples, samples + 256, bias); | |
672 + break; | |
673 + | |
674 + case CONVERT (A52_2F1R, A52_MONO): | |
675 + if (slev == 0) | |
676 + goto mix_2to1_SSE; | |
677 + case CONVERT (A52_3F, A52_MONO): | |
678 + mix_3to1_SSE: | |
679 + mix3to1_SSE (samples, bias); | |
680 + break; | |
681 + | |
682 + case CONVERT (A52_3F1R, A52_MONO): | |
683 + if (slev == 0) | |
684 + goto mix_3to1_SSE; | |
685 + case CONVERT (A52_2F2R, A52_MONO): | |
686 + if (slev == 0) | |
687 + goto mix_2to1_SSE; | |
688 + mix4to1_SSE (samples, bias); | |
689 + break; | |
690 + | |
691 + case CONVERT (A52_3F2R, A52_MONO): | |
692 + if (slev == 0) | |
693 + goto mix_3to1_SSE; | |
694 + mix5to1_SSE (samples, bias); | |
695 + break; | |
696 + | |
697 + case CONVERT (A52_MONO, A52_DOLBY): | |
698 + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
699 + break; | |
700 + | |
701 + case CONVERT (A52_3F, A52_STEREO): | |
702 + case CONVERT (A52_3F, A52_DOLBY): | |
703 + mix_3to2_SSE: | |
704 + mix3to2_SSE (samples, bias); | |
705 + break; | |
706 + | |
707 + case CONVERT (A52_2F1R, A52_STEREO): | |
708 + if (slev == 0) | |
709 + break; | |
710 + mix21to2_SSE (samples, samples + 256, bias); | |
711 + break; | |
712 + | |
713 + case CONVERT (A52_2F1R, A52_DOLBY): | |
714 + mix21toS_SSE (samples, bias); | |
715 + break; | |
716 + | |
717 + case CONVERT (A52_3F1R, A52_STEREO): | |
718 + if (slev == 0) | |
719 + goto mix_3to2_SSE; | |
720 + mix31to2_SSE (samples, bias); | |
721 + break; | |
722 + | |
723 + case CONVERT (A52_3F1R, A52_DOLBY): | |
724 + mix31toS_SSE (samples, bias); | |
725 + break; | |
726 + | |
727 + case CONVERT (A52_2F2R, A52_STEREO): | |
728 + if (slev == 0) | |
729 + break; | |
730 + mix2to1_SSE (samples, samples + 512, bias); | |
731 + mix2to1_SSE (samples + 256, samples + 768, bias); | |
732 + break; | |
733 + | |
734 + case CONVERT (A52_2F2R, A52_DOLBY): | |
735 + mix22toS_SSE (samples, bias); | |
736 + break; | |
737 + | |
738 + case CONVERT (A52_3F2R, A52_STEREO): | |
739 + if (slev == 0) | |
740 + goto mix_3to2_SSE; | |
741 + mix32to2_SSE (samples, bias); | |
742 + break; | |
743 + | |
744 + case CONVERT (A52_3F2R, A52_DOLBY): | |
745 + mix32toS_SSE (samples, bias); | |
746 + break; | |
747 + | |
748 + case CONVERT (A52_3F1R, A52_3F): | |
749 + if (slev == 0) | |
750 + break; | |
751 + mix21to2_SSE (samples, samples + 512, bias); | |
752 + break; | |
753 + | |
754 + case CONVERT (A52_3F2R, A52_3F): | |
755 + if (slev == 0) | |
756 + break; | |
757 + mix2to1_SSE (samples, samples + 768, bias); | |
758 + mix2to1_SSE (samples + 512, samples + 1024, bias); | |
759 + break; | |
760 + | |
761 + case CONVERT (A52_3F1R, A52_2F1R): | |
762 + mix3to2_SSE (samples, bias); | |
763 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
764 + break; | |
765 + | |
766 + case CONVERT (A52_2F2R, A52_2F1R): | |
767 + mix2to1_SSE (samples + 512, samples + 768, bias); | |
768 + break; | |
769 + | |
770 + case CONVERT (A52_3F2R, A52_2F1R): | |
771 + mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
772 + move2to1_SSE (samples + 768, samples + 512, bias); | |
773 + break; | |
774 + | |
775 + case CONVERT (A52_3F2R, A52_3F1R): | |
776 + mix2to1_SSE (samples + 768, samples + 1024, bias); | |
777 + break; | |
778 + | |
779 + case CONVERT (A52_2F1R, A52_2F2R): | |
780 + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
781 + break; | |
782 + | |
783 + case CONVERT (A52_3F1R, A52_2F2R): | |
784 + mix3to2_SSE (samples, bias); | |
785 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
786 + break; | |
787 + | |
788 + case CONVERT (A52_3F2R, A52_2F2R): | |
789 + mix3to2_SSE (samples, bias); | |
790 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
791 + memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
792 + break; | |
793 + | |
794 + case CONVERT (A52_3F1R, A52_3F2R): | |
795 + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
796 + break; | |
797 + } | |
798 +} | |
799 + | |
800 +static void upmix_MMX (sample_t * samples, int acmod, int output) | |
801 +{ | |
802 + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
803 + | |
804 + case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
805 + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
806 + break; | |
807 + | |
808 + case CONVERT (A52_3F2R, A52_MONO): | |
809 + zero_MMX (samples + 1024); | |
810 + case CONVERT (A52_3F1R, A52_MONO): | |
811 + case CONVERT (A52_2F2R, A52_MONO): | |
812 + zero_MMX (samples + 768); | |
813 + case CONVERT (A52_3F, A52_MONO): | |
814 + case CONVERT (A52_2F1R, A52_MONO): | |
815 + zero_MMX (samples + 512); | |
816 + case CONVERT (A52_CHANNEL, A52_MONO): | |
817 + case CONVERT (A52_STEREO, A52_MONO): | |
818 + zero_MMX (samples + 256); | |
819 + break; | |
820 + | |
821 + case CONVERT (A52_3F2R, A52_STEREO): | |
822 + case CONVERT (A52_3F2R, A52_DOLBY): | |
823 + zero_MMX (samples + 1024); | |
824 + case CONVERT (A52_3F1R, A52_STEREO): | |
825 + case CONVERT (A52_3F1R, A52_DOLBY): | |
826 + zero_MMX (samples + 768); | |
827 + case CONVERT (A52_3F, A52_STEREO): | |
828 + case CONVERT (A52_3F, A52_DOLBY): | |
829 + mix_3to2_MMX: | |
830 + memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
831 + zero_MMX (samples + 256); | |
832 + break; | |
833 + | |
834 + case CONVERT (A52_2F2R, A52_STEREO): | |
835 + case CONVERT (A52_2F2R, A52_DOLBY): | |
836 + zero_MMX (samples + 768); | |
837 + case CONVERT (A52_2F1R, A52_STEREO): | |
838 + case CONVERT (A52_2F1R, A52_DOLBY): | |
839 + zero_MMX (samples + 512); | |
840 + break; | |
841 + | |
842 + case CONVERT (A52_3F2R, A52_3F): | |
843 + zero_MMX (samples + 1024); | |
844 + case CONVERT (A52_3F1R, A52_3F): | |
845 + case CONVERT (A52_2F2R, A52_2F1R): | |
846 + zero_MMX (samples + 768); | |
847 + break; | |
848 + | |
849 + case CONVERT (A52_3F2R, A52_3F1R): | |
850 + zero_MMX (samples + 1024); | |
851 + break; | |
852 + | |
853 + case CONVERT (A52_3F2R, A52_2F1R): | |
854 + zero_MMX (samples + 1024); | |
855 + case CONVERT (A52_3F1R, A52_2F1R): | |
856 + mix_31to21_MMX: | |
857 + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
858 + goto mix_3to2_MMX; | |
859 + | |
860 + case CONVERT (A52_3F2R, A52_2F2R): | |
861 + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
862 + goto mix_31to21_MMX; | |
863 + } | |
864 +} | |
865 + | |
866 +static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) | |
867 +{ | |
868 + asm volatile( | |
869 + "movd %2, %%mm7 \n\t" | |
870 + "punpckldq %2, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
871 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 872 + ASMALIGN(4) |
14990 | 873 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
874 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
875 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
876 + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
877 + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
878 + "pfadd (%1, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
879 + "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
880 + "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
881 + "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" |
14990 | 882 + "pfadd %%mm7, %%mm0 \n\t" |
883 + "pfadd %%mm7, %%mm1 \n\t" | |
884 + "pfadd %%mm7, %%mm2 \n\t" | |
885 + "pfadd %%mm7, %%mm3 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
886 + "movq %%mm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
887 + "movq %%mm1, 8(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
888 + "movq %%mm2, 16(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
889 + "movq %%mm3, 24(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
890 + "add $32, %%"REG_S" \n\t" |
14990 | 891 + " jnz 1b \n\t" |
892 + :: "r" (src+256), "r" (dest+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
893 + : "%"REG_S |
14990 | 894 + ); |
895 +} | |
896 + | |
897 +static void mix3to1_3dnow (sample_t * samples, sample_t bias) | |
898 +{ | |
899 + asm volatile( | |
900 + "movd %1, %%mm7 \n\t" | |
901 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
902 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 903 + ASMALIGN(4) |
14990 | 904 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
905 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
906 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
907 + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
908 + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
909 + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
910 + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 911 + "pfadd %%mm7, %%mm0 \n\t" |
912 + "pfadd %%mm7, %%mm1 \n\t" | |
913 + "pfadd %%mm2, %%mm0 \n\t" | |
914 + "pfadd %%mm3, %%mm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
915 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
916 + "movq %%mm1, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
917 + "add $16, %%"REG_S" \n\t" |
14990 | 918 + " jnz 1b \n\t" |
919 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
920 + : "%"REG_S |
14990 | 921 + ); |
922 +} | |
923 + | |
924 +static void mix4to1_3dnow (sample_t * samples, sample_t bias) | |
925 +{ | |
926 + asm volatile( | |
927 + "movd %1, %%mm7 \n\t" | |
928 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
929 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 930 + ASMALIGN(4) |
14990 | 931 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
932 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
933 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
934 + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
935 + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
936 + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
937 + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
938 + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
939 + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" |
14990 | 940 + "pfadd %%mm7, %%mm0 \n\t" |
941 + "pfadd %%mm7, %%mm1 \n\t" | |
942 + "pfadd %%mm2, %%mm0 \n\t" | |
943 + "pfadd %%mm3, %%mm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
944 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
945 + "movq %%mm1, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
946 + "add $16, %%"REG_S" \n\t" |
14990 | 947 + " jnz 1b \n\t" |
948 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
949 + : "%"REG_S |
14990 | 950 + ); |
951 +} | |
952 + | |
953 +static void mix5to1_3dnow (sample_t * samples, sample_t bias) | |
954 +{ | |
955 + asm volatile( | |
956 + "movd %1, %%mm7 \n\t" | |
957 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
958 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 959 + ASMALIGN(4) |
14990 | 960 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
961 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
962 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
963 + "movq 1024(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
964 + "movq 1032(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
965 + "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
966 + "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
967 + "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
968 + "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" |
14990 | 969 + "pfadd %%mm7, %%mm0 \n\t" |
970 + "pfadd %%mm7, %%mm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
971 + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
972 + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" |
14990 | 973 + "pfadd %%mm2, %%mm0 \n\t" |
974 + "pfadd %%mm3, %%mm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
975 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
976 + "movq %%mm1, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
977 + "add $16, %%"REG_S" \n\t" |
14990 | 978 + " jnz 1b \n\t" |
979 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
980 + : "%"REG_S |
14990 | 981 + ); |
982 +} | |
983 + | |
984 +static void mix3to2_3dnow (sample_t * samples, sample_t bias) | |
985 +{ | |
986 + asm volatile( | |
987 + "movd %1, %%mm7 \n\t" | |
988 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
989 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 990 + ASMALIGN(4) |
14990 | 991 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
992 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
993 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 994 + "pfadd %%mm7, %%mm0 \n\t" //common |
995 + "pfadd %%mm7, %%mm1 \n\t" //common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
996 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
997 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
998 + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
999 + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1000 + "pfadd %%mm0, %%mm2 \n\t" |
1001 + "pfadd %%mm1, %%mm3 \n\t" | |
1002 + "pfadd %%mm0, %%mm4 \n\t" | |
1003 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1004 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1005 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1006 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1007 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1008 + "add $16, %%"REG_S" \n\t" |
14990 | 1009 + " jnz 1b \n\t" |
1010 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1011 + : "%"REG_S |
14990 | 1012 + ); |
1013 +} | |
1014 + | |
1015 +static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) | |
1016 +{ | |
1017 + asm volatile( | |
1018 + "movd %2, %%mm7 \n\t" | |
1019 + "punpckldq %2, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1020 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1021 + ASMALIGN(4) |
14990 | 1022 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1023 + "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1024 + "movq 1032(%1, %%"REG_S"), %%mm1\n\t" |
14990 | 1025 + "pfadd %%mm7, %%mm0 \n\t" //common |
1026 + "pfadd %%mm7, %%mm1 \n\t" //common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1027 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1028 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1029 + "movq (%1, %%"REG_S"), %%mm4 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1030 + "movq 8(%1, %%"REG_S"), %%mm5 \n\t" |
14990 | 1031 + "pfadd %%mm0, %%mm2 \n\t" |
1032 + "pfadd %%mm1, %%mm3 \n\t" | |
1033 + "pfadd %%mm0, %%mm4 \n\t" | |
1034 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1035 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1036 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1037 + "movq %%mm4, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1038 + "movq %%mm5, 8(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1039 + "add $16, %%"REG_S" \n\t" |
14990 | 1040 + " jnz 1b \n\t" |
1041 + :: "r" (left+256), "r" (right+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1042 + : "%"REG_S |
14990 | 1043 + ); |
1044 +} | |
1045 + | |
1046 +static void mix21toS_3dnow (sample_t * samples, sample_t bias) | |
1047 +{ | |
1048 + asm volatile( | |
1049 + "movd %1, %%mm7 \n\t" | |
1050 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1051 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1052 + ASMALIGN(4) |
14990 | 1053 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1054 + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1055 + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1056 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1057 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1058 + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1059 + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1060 + "pfadd %%mm7, %%mm2 \n\t" |
1061 + "pfadd %%mm7, %%mm3 \n\t" | |
1062 + "pfadd %%mm7, %%mm4 \n\t" | |
1063 + "pfadd %%mm7, %%mm5 \n\t" | |
1064 + "pfsub %%mm0, %%mm2 \n\t" | |
1065 + "pfsub %%mm1, %%mm3 \n\t" | |
1066 + "pfadd %%mm0, %%mm4 \n\t" | |
1067 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1068 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1069 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1070 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1071 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1072 + "add $16, %%"REG_S" \n\t" |
14990 | 1073 + " jnz 1b \n\t" |
1074 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1075 + : "%"REG_S |
14990 | 1076 + ); |
1077 +} | |
1078 + | |
1079 +static void mix31to2_3dnow (sample_t * samples, sample_t bias) | |
1080 +{ | |
1081 + asm volatile( | |
1082 + "movd %1, %%mm7 \n\t" | |
1083 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1084 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1085 + ASMALIGN(4) |
14990 | 1086 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1087 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1088 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1089 + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1090 + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 1091 + "pfadd %%mm7, %%mm0 \n\t" // common |
1092 + "pfadd %%mm7, %%mm1 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1093 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1094 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1095 + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1096 + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1097 + "pfadd %%mm0, %%mm2 \n\t" |
1098 + "pfadd %%mm1, %%mm3 \n\t" | |
1099 + "pfadd %%mm0, %%mm4 \n\t" | |
1100 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1101 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1102 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1103 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1104 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1105 + "add $16, %%"REG_S" \n\t" |
14990 | 1106 + " jnz 1b \n\t" |
1107 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1108 + : "%"REG_S |
14990 | 1109 + ); |
1110 +} | |
1111 + | |
1112 +static void mix31toS_3dnow (sample_t * samples, sample_t bias) | |
1113 +{ | |
1114 + asm volatile( | |
1115 + "movd %1, %%mm7 \n\t" | |
1116 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1117 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1118 + ASMALIGN(4) |
14990 | 1119 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1120 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1121 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 1122 + "pfadd %%mm7, %%mm0 \n\t" // common |
1123 + "pfadd %%mm7, %%mm1 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1124 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1125 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1126 + "movq 2048(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1127 + "movq 2056(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1128 + "pfadd %%mm0, %%mm2 \n\t" |
1129 + "pfadd %%mm1, %%mm3 \n\t" | |
1130 + "pfadd %%mm0, %%mm4 \n\t" | |
1131 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1132 + "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1133 + "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround |
14990 | 1134 + "pfsub %%mm0, %%mm2 \n\t" |
1135 + "pfsub %%mm1, %%mm3 \n\t" | |
1136 + "pfadd %%mm0, %%mm4 \n\t" | |
1137 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1138 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1139 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1140 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1141 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1142 + "add $16, %%"REG_S" \n\t" |
14990 | 1143 + " jnz 1b \n\t" |
1144 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1145 + : "%"REG_S |
14990 | 1146 + ); |
1147 +} | |
1148 + | |
1149 +static void mix22toS_3dnow (sample_t * samples, sample_t bias) | |
1150 +{ | |
1151 + asm volatile( | |
1152 + "movd %1, %%mm7 \n\t" | |
1153 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1154 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1155 + ASMALIGN(4) |
14990 | 1156 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1157 + "movq 2048(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1158 + "movq 2056(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1159 + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1160 + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1161 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1162 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1163 + "movq 1024(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1164 + "movq 1032(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1165 + "pfadd %%mm7, %%mm2 \n\t" |
1166 + "pfadd %%mm7, %%mm3 \n\t" | |
1167 + "pfadd %%mm7, %%mm4 \n\t" | |
1168 + "pfadd %%mm7, %%mm5 \n\t" | |
1169 + "pfsub %%mm0, %%mm2 \n\t" | |
1170 + "pfsub %%mm1, %%mm3 \n\t" | |
1171 + "pfadd %%mm0, %%mm4 \n\t" | |
1172 + "pfadd %%mm1, %%mm5 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1173 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1174 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1175 + "movq %%mm4, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1176 + "movq %%mm5, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1177 + "add $16, %%"REG_S" \n\t" |
14990 | 1178 + " jnz 1b \n\t" |
1179 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1180 + : "%"REG_S |
14990 | 1181 + ); |
1182 +} | |
1183 + | |
1184 +static void mix32to2_3dnow (sample_t * samples, sample_t bias) | |
1185 +{ | |
1186 + asm volatile( | |
1187 + "movd %1, %%mm7 \n\t" | |
1188 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1189 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1190 + ASMALIGN(4) |
14990 | 1191 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1192 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1193 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
14990 | 1194 + "pfadd %%mm7, %%mm0 \n\t" // common |
1195 + "pfadd %%mm7, %%mm1 \n\t" // common | |
1196 + "movq %%mm0, %%mm2 \n\t" // common | |
1197 + "movq %%mm1, %%mm3 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1198 + "pfadd (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1199 + "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1200 + "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1201 + "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1202 + "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1203 + "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1204 + "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1205 + "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1206 + "movq %%mm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1207 + "movq %%mm1, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1208 + "movq %%mm2, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1209 + "movq %%mm3, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1210 + "add $16, %%"REG_S" \n\t" |
14990 | 1211 + " jnz 1b \n\t" |
1212 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1213 + : "%"REG_S |
14990 | 1214 + ); |
1215 +} | |
1216 + | |
1217 +/* todo: should be optimized better */ | |
1218 +static void mix32toS_3dnow (sample_t * samples, sample_t bias) | |
1219 +{ | |
1220 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1221 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1222 + ASMALIGN(4) |
14990 | 1223 + "1: \n\t" |
1224 + "movd %1, %%mm7 \n\t" | |
1225 + "punpckldq %1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1226 + "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1227 + "movq 1032(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1228 + "movq 3072(%0, %%"REG_S"), %%mm4\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1229 + "movq 3080(%0, %%"REG_S"), %%mm5\n\t" |
14990 | 1230 + "pfadd %%mm7, %%mm0 \n\t" // common |
1231 + "pfadd %%mm7, %%mm1 \n\t" // common | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1232 + "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1233 + "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1234 + "movq (%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1235 + "movq 8(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1236 + "movq 2048(%0, %%"REG_S"), %%mm6\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1237 + "movq 2056(%0, %%"REG_S"), %%mm7\n\t" |
14990 | 1238 + "pfsub %%mm4, %%mm2 \n\t" |
1239 + "pfsub %%mm5, %%mm3 \n\t" | |
1240 + "pfadd %%mm4, %%mm6 \n\t" | |
1241 + "pfadd %%mm5, %%mm7 \n\t" | |
1242 + "pfadd %%mm0, %%mm2 \n\t" | |
1243 + "pfadd %%mm1, %%mm3 \n\t" | |
1244 + "pfadd %%mm0, %%mm6 \n\t" | |
1245 + "pfadd %%mm1, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1246 + "movq %%mm2, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1247 + "movq %%mm3, 8(%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1248 + "movq %%mm6, 1024(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1249 + "movq %%mm7, 1032(%0, %%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1250 + "add $16, %%"REG_S" \n\t" |
14990 | 1251 + " jnz 1b \n\t" |
1252 + :: "r" (samples+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1253 + : "%"REG_S |
14990 | 1254 + ); |
1255 +} | |
1256 + | |
1257 +static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) | |
1258 +{ | |
1259 + asm volatile( | |
1260 + "movd %2, %%mm7 \n\t" | |
1261 + "punpckldq %2, %%mm7 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1262 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 1263 + ASMALIGN(4) |
14990 | 1264 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1265 + "movq (%0, %%"REG_S"), %%mm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1266 + "movq 8(%0, %%"REG_S"), %%mm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1267 + "movq 16(%0, %%"REG_S"), %%mm2 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1268 + "movq 24(%0, %%"REG_S"), %%mm3 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1269 + "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1270 + "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1271 + "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1272 + "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t" |
14990 | 1273 + "pfadd %%mm7, %%mm0 \n\t" |
1274 + "pfadd %%mm7, %%mm1 \n\t" | |
1275 + "pfadd %%mm7, %%mm2 \n\t" | |
1276 + "pfadd %%mm7, %%mm3 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1277 + "movq %%mm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1278 + "movq %%mm1, 8(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1279 + "movq %%mm2, 16(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1280 + "movq %%mm3, 24(%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1281 + "add $32, %%"REG_S" \n\t" |
14990 | 1282 + " jnz 1b \n\t" |
1283 + :: "r" (src+256), "r" (dest+256), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1284 + : "%"REG_S |
14990 | 1285 + ); |
1286 +} | |
1287 + | |
1288 +static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
1289 + sample_t clev, sample_t slev) | |
1290 +{ | |
1291 + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1292 + | |
1293 + case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1294 + memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1295 + break; | |
1296 + | |
1297 + case CONVERT (A52_CHANNEL, A52_MONO): | |
1298 + case CONVERT (A52_STEREO, A52_MONO): | |
1299 + mix_2to1_3dnow: | |
1300 + mix2to1_3dnow (samples, samples + 256, bias); | |
1301 + break; | |
1302 + | |
1303 + case CONVERT (A52_2F1R, A52_MONO): | |
1304 + if (slev == 0) | |
1305 + goto mix_2to1_3dnow; | |
1306 + case CONVERT (A52_3F, A52_MONO): | |
1307 + mix_3to1_3dnow: | |
1308 + mix3to1_3dnow (samples, bias); | |
1309 + break; | |
1310 + | |
1311 + case CONVERT (A52_3F1R, A52_MONO): | |
1312 + if (slev == 0) | |
1313 + goto mix_3to1_3dnow; | |
1314 + case CONVERT (A52_2F2R, A52_MONO): | |
1315 + if (slev == 0) | |
1316 + goto mix_2to1_3dnow; | |
1317 + mix4to1_3dnow (samples, bias); | |
1318 + break; | |
1319 + | |
1320 + case CONVERT (A52_3F2R, A52_MONO): | |
1321 + if (slev == 0) | |
1322 + goto mix_3to1_3dnow; | |
1323 + mix5to1_3dnow (samples, bias); | |
1324 + break; | |
1325 + | |
1326 + case CONVERT (A52_MONO, A52_DOLBY): | |
1327 + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1328 + break; | |
1329 + | |
1330 + case CONVERT (A52_3F, A52_STEREO): | |
1331 + case CONVERT (A52_3F, A52_DOLBY): | |
1332 + mix_3to2_3dnow: | |
1333 + mix3to2_3dnow (samples, bias); | |
1334 + break; | |
1335 + | |
1336 + case CONVERT (A52_2F1R, A52_STEREO): | |
1337 + if (slev == 0) | |
1338 + break; | |
1339 + mix21to2_3dnow (samples, samples + 256, bias); | |
1340 + break; | |
1341 + | |
1342 + case CONVERT (A52_2F1R, A52_DOLBY): | |
1343 + mix21toS_3dnow (samples, bias); | |
1344 + break; | |
1345 + | |
1346 + case CONVERT (A52_3F1R, A52_STEREO): | |
1347 + if (slev == 0) | |
1348 + goto mix_3to2_3dnow; | |
1349 + mix31to2_3dnow (samples, bias); | |
1350 + break; | |
1351 + | |
1352 + case CONVERT (A52_3F1R, A52_DOLBY): | |
1353 + mix31toS_3dnow (samples, bias); | |
1354 + break; | |
1355 + | |
1356 + case CONVERT (A52_2F2R, A52_STEREO): | |
1357 + if (slev == 0) | |
1358 + break; | |
1359 + mix2to1_3dnow (samples, samples + 512, bias); | |
1360 + mix2to1_3dnow (samples + 256, samples + 768, bias); | |
1361 + break; | |
1362 + | |
1363 + case CONVERT (A52_2F2R, A52_DOLBY): | |
1364 + mix22toS_3dnow (samples, bias); | |
1365 + break; | |
1366 + | |
1367 + case CONVERT (A52_3F2R, A52_STEREO): | |
1368 + if (slev == 0) | |
1369 + goto mix_3to2_3dnow; | |
1370 + mix32to2_3dnow (samples, bias); | |
1371 + break; | |
1372 + | |
1373 + case CONVERT (A52_3F2R, A52_DOLBY): | |
1374 + mix32toS_3dnow (samples, bias); | |
1375 + break; | |
1376 + | |
1377 + case CONVERT (A52_3F1R, A52_3F): | |
1378 + if (slev == 0) | |
1379 + break; | |
1380 + mix21to2_3dnow (samples, samples + 512, bias); | |
1381 + break; | |
1382 + | |
1383 + case CONVERT (A52_3F2R, A52_3F): | |
1384 + if (slev == 0) | |
1385 + break; | |
1386 + mix2to1_3dnow (samples, samples + 768, bias); | |
1387 + mix2to1_3dnow (samples + 512, samples + 1024, bias); | |
1388 + break; | |
1389 + | |
1390 + case CONVERT (A52_3F1R, A52_2F1R): | |
1391 + mix3to2_3dnow (samples, bias); | |
1392 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1393 + break; | |
1394 + | |
1395 + case CONVERT (A52_2F2R, A52_2F1R): | |
1396 + mix2to1_3dnow (samples + 512, samples + 768, bias); | |
1397 + break; | |
1398 + | |
1399 + case CONVERT (A52_3F2R, A52_2F1R): | |
1400 + mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1401 + move2to1_3dnow (samples + 768, samples + 512, bias); | |
1402 + break; | |
1403 + | |
1404 + case CONVERT (A52_3F2R, A52_3F1R): | |
1405 + mix2to1_3dnow (samples + 768, samples + 1024, bias); | |
1406 + break; | |
1407 + | |
1408 + case CONVERT (A52_2F1R, A52_2F2R): | |
1409 + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1410 + break; | |
1411 + | |
1412 + case CONVERT (A52_3F1R, A52_2F2R): | |
1413 + mix3to2_3dnow (samples, bias); | |
1414 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1415 + break; | |
1416 + | |
1417 + case CONVERT (A52_3F2R, A52_2F2R): | |
1418 + mix3to2_3dnow (samples, bias); | |
1419 + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1420 + memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1421 + break; | |
1422 + | |
1423 + case CONVERT (A52_3F1R, A52_3F2R): | |
1424 + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
1425 + break; | |
1426 + } | |
1427 + __asm __volatile("femms":::"memory"); | |
1428 +} | |
1429 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1430 +#endif // ARCH_X86 || ARCH_X86_64 |
25995 | 1431 --- liba52/imdct.c 2008-02-19 00:18:33.000000000 +0100 |
1432 +++ liba52/imdct.c 2008-02-19 00:16:40.000000000 +0100 | |
1433 @@ -22,6 +26,11 @@ | |
14990 | 1434 * You should have received a copy of the GNU General Public License |
1435 * along with this program; if not, write to the Free Software | |
1436 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
1437 + * | |
1438 + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
1439 + * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru> | |
1440 + * michael did port them from libac3 (untested, perhaps totally broken) | |
1441 + * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org) | |
1442 */ | |
1443 | |
1444 #include "config.h" | |
19373 | 1445 @@ -39,12 +48,49 @@ |
14990 | 1446 #include "a52.h" |
1447 #include "a52_internal.h" | |
1448 #include "mm_accel.h" | |
1449 +#include "mangle.h" | |
1450 + | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1451 +void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1452 + |
14990 | 1453 +#ifdef RUNTIME_CPUDETECT |
1454 +#undef HAVE_3DNOWEX | |
1455 +#endif | |
1456 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1457 typedef struct complex_s { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1458 sample_t real; |
14990 | 1459 sample_t imag; |
1460 } complex_t; | |
1461 | |
1462 +static const int pm128[128] attribute_used __attribute__((aligned(16))) = | |
1463 +{ | |
1464 + 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, | |
1465 + 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, | |
1466 + 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, | |
1467 + 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, | |
1468 + 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, | |
1469 + 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, | |
1470 + 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, | |
1471 + 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 | |
1472 +}; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1473 + |
14990 | 1474 +static uint8_t attribute_used bit_reverse_512[] = { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1475 + 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1476 + 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1477 + 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1478 + 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1479 + 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1480 + 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1481 + 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1482 + 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1483 + 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1484 + 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1485 + 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1486 + 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1487 + 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1488 + 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1489 + 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1490 + 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1491 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1492 static uint8_t fftorder[] = { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1493 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1494 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, |
19373 | 1495 @@ -56,6 +102,40 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1496 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1497 }; |
14990 | 1498 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1499 +static complex_t __attribute__((aligned(16))) buf[128]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1500 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1501 +/* Twiddle factor LUT */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1502 +static complex_t __attribute__((aligned(16))) w_1[1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1503 +static complex_t __attribute__((aligned(16))) w_2[2]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1504 +static complex_t __attribute__((aligned(16))) w_4[4]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1505 +static complex_t __attribute__((aligned(16))) w_8[8]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1506 +static complex_t __attribute__((aligned(16))) w_16[16]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1507 +static complex_t __attribute__((aligned(16))) w_32[32]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1508 +static complex_t __attribute__((aligned(16))) w_64[64]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1509 +static complex_t __attribute__((aligned(16))) * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64}; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1510 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1511 +/* Twiddle factors for IMDCT */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1512 +static sample_t __attribute__((aligned(16))) xcos1[128]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1513 +static sample_t __attribute__((aligned(16))) xsin1[128]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1514 + |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1515 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 1516 +// NOTE: SSE needs 16byte alignment or it will segfault |
1517 +// | |
1518 +static float __attribute__((aligned(16))) sseSinCos1c[256]; | |
1519 +static float __attribute__((aligned(16))) sseSinCos1d[256]; | |
1520 +static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; | |
1521 +//static float __attribute__((aligned(16))) sseW0[4]; | |
1522 +static float __attribute__((aligned(16))) sseW1[8]; | |
1523 +static float __attribute__((aligned(16))) sseW2[16]; | |
1524 +static float __attribute__((aligned(16))) sseW3[32]; | |
1525 +static float __attribute__((aligned(16))) sseW4[64]; | |
1526 +static float __attribute__((aligned(16))) sseW5[128]; | |
1527 +static float __attribute__((aligned(16))) sseW6[256]; | |
1528 +static float __attribute__((aligned(16))) *sseW[7]= | |
1529 + {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; | |
1530 +static float __attribute__((aligned(16))) sseWindow[512]; | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1531 +#endif |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1532 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1533 /* Root values for IFFT */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1534 static sample_t roots16[3]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1535 static sample_t roots32[7]; |
19373 | 1536 @@ -241,7 +321,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1537 ifft_pass (buf, roots128 - 32, 32); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1538 } |
14990 | 1539 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1540 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1541 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1542 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1543 int i, k; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1544 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; |
25995 | 1545 @@ -285,6 +365,707 @@ |
14990 | 1546 } |
1547 } | |
1548 | |
1549 +#ifdef HAVE_ALTIVEC | |
1550 + | |
25328
6f0309e575e0
There is a check for altivec.h in configure so use the preprocessor directive
diego
parents:
25327
diff
changeset
|
1551 +#ifdef HAVE_ALTIVEC_H |
14990 | 1552 +#include <altivec.h> |
1553 +#endif | |
1554 + | |
1555 +// used to build registers permutation vectors (vcprm) | |
1556 +// the 's' are for words in the _s_econd vector | |
1557 +#define WORD_0 0x00,0x01,0x02,0x03 | |
1558 +#define WORD_1 0x04,0x05,0x06,0x07 | |
1559 +#define WORD_2 0x08,0x09,0x0a,0x0b | |
1560 +#define WORD_3 0x0c,0x0d,0x0e,0x0f | |
1561 +#define WORD_s0 0x10,0x11,0x12,0x13 | |
1562 +#define WORD_s1 0x14,0x15,0x16,0x17 | |
1563 +#define WORD_s2 0x18,0x19,0x1a,0x1b | |
1564 +#define WORD_s3 0x1c,0x1d,0x1e,0x1f | |
1565 + | |
25327
a7b716b53e9f
Replace SYS_DARWIN conditional directive around gcc macros by __APPLE_CC__.
diego
parents:
23873
diff
changeset
|
1566 +#ifdef __APPLE_CC__ |
25995 | 1567 +#define AVV(x...) (x) |
14990 | 1568 +#else |
25995 | 1569 +#define AVV(x...) {x} |
14990 | 1570 +#endif |
1571 + | |
25995 | 1572 +#define vcprm(a,b,c,d) (const vector unsigned char)AVV(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) |
1573 +#define vcii(a,b,c,d) (const vector float)AVV(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) | |
1574 + | |
1575 +#define FOUROF(a) AVV(a,a,a,a) | |
1576 + | |
14990 | 1577 +// vcprmle is used to keep the same index as in the SSE version. |
1578 +// it's the same as vcprm, with the index inversed | |
1579 +// ('le' is Little Endian) | |
1580 +#define vcprmle(a,b,c,d) vcprm(d,c,b,a) | |
1581 + | |
1582 +// used to build inverse/identity vectors (vcii) | |
1583 +// n is _n_egative, p is _p_ositive | |
1584 +#define FLOAT_n -1. | |
1585 +#define FLOAT_p 1. | |
1586 + | |
1587 + | |
1588 +void | |
1589 +imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) | |
1590 +{ | |
1591 + int i; | |
1592 + int k; | |
1593 + int p,q; | |
1594 + int m; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1595 + long two_m; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1596 + long two_m_plus_one; |
14990 | 1597 + |
1598 + sample_t tmp_b_i; | |
1599 + sample_t tmp_b_r; | |
1600 + sample_t tmp_a_i; | |
1601 + sample_t tmp_a_r; | |
1602 + | |
1603 + sample_t *data_ptr; | |
1604 + sample_t *delay_ptr; | |
1605 + sample_t *window_ptr; | |
1606 + | |
1607 + /* 512 IMDCT with source and dest data in 'data' */ | |
1608 + | |
1609 + /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ | |
1610 + for( i=0; i < 128; i++) { | |
1611 + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ | |
1612 + int j= bit_reverse_512[i]; | |
1613 + buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); | |
1614 + buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); | |
1615 + } | |
1616 + | |
1617 + /* 1. iteration */ | |
1618 + for(i = 0; i < 128; i += 2) { | |
1619 +#if 0 | |
1620 + tmp_a_r = buf[i].real; | |
1621 + tmp_a_i = buf[i].imag; | |
1622 + tmp_b_r = buf[i+1].real; | |
1623 + tmp_b_i = buf[i+1].imag; | |
1624 + buf[i].real = tmp_a_r + tmp_b_r; | |
1625 + buf[i].imag = tmp_a_i + tmp_b_i; | |
1626 + buf[i+1].real = tmp_a_r - tmp_b_r; | |
1627 + buf[i+1].imag = tmp_a_i - tmp_b_i; | |
1628 +#else | |
1629 + vector float temp, bufv; | |
1630 + | |
1631 + bufv = vec_ld(i << 3, (float*)buf); | |
1632 + temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); | |
1633 + bufv = vec_madd(bufv, vcii(p,p,n,n), temp); | |
1634 + vec_st(bufv, i << 3, (float*)buf); | |
1635 +#endif | |
1636 + } | |
1637 + | |
1638 + /* 2. iteration */ | |
1639 + // Note w[1]={{1,0}, {0,-1}} | |
1640 + for(i = 0; i < 128; i += 4) { | |
1641 +#if 0 | |
1642 + tmp_a_r = buf[i].real; | |
1643 + tmp_a_i = buf[i].imag; | |
1644 + tmp_b_r = buf[i+2].real; | |
1645 + tmp_b_i = buf[i+2].imag; | |
1646 + buf[i].real = tmp_a_r + tmp_b_r; | |
1647 + buf[i].imag = tmp_a_i + tmp_b_i; | |
1648 + buf[i+2].real = tmp_a_r - tmp_b_r; | |
1649 + buf[i+2].imag = tmp_a_i - tmp_b_i; | |
1650 + tmp_a_r = buf[i+1].real; | |
1651 + tmp_a_i = buf[i+1].imag; | |
1652 + /* WARNING: im <-> re here ! */ | |
1653 + tmp_b_r = buf[i+3].imag; | |
1654 + tmp_b_i = buf[i+3].real; | |
1655 + buf[i+1].real = tmp_a_r + tmp_b_r; | |
1656 + buf[i+1].imag = tmp_a_i - tmp_b_i; | |
1657 + buf[i+3].real = tmp_a_r - tmp_b_r; | |
1658 + buf[i+3].imag = tmp_a_i + tmp_b_i; | |
1659 +#else | |
1660 + vector float buf01, buf23, temp1, temp2; | |
1661 + | |
1662 + buf01 = vec_ld((i + 0) << 3, (float*)buf); | |
1663 + buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
1664 + buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); | |
1665 + | |
1666 + temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01); | |
1667 + temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01); | |
1668 + | |
1669 + vec_st(temp1, (i + 0) << 3, (float*)buf); | |
1670 + vec_st(temp2, (i + 2) << 3, (float*)buf); | |
1671 +#endif | |
1672 + } | |
1673 + | |
1674 + /* 3. iteration */ | |
1675 + for(i = 0; i < 128; i += 8) { | |
1676 +#if 0 | |
1677 + tmp_a_r = buf[i].real; | |
1678 + tmp_a_i = buf[i].imag; | |
1679 + tmp_b_r = buf[i+4].real; | |
1680 + tmp_b_i = buf[i+4].imag; | |
1681 + buf[i].real = tmp_a_r + tmp_b_r; | |
1682 + buf[i].imag = tmp_a_i + tmp_b_i; | |
1683 + buf[i+4].real = tmp_a_r - tmp_b_r; | |
1684 + buf[i+4].imag = tmp_a_i - tmp_b_i; | |
1685 + tmp_a_r = buf[1+i].real; | |
1686 + tmp_a_i = buf[1+i].imag; | |
1687 + tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; | |
1688 + tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; | |
1689 + buf[1+i].real = tmp_a_r + tmp_b_r; | |
1690 + buf[1+i].imag = tmp_a_i + tmp_b_i; | |
1691 + buf[i+5].real = tmp_a_r - tmp_b_r; | |
1692 + buf[i+5].imag = tmp_a_i - tmp_b_i; | |
1693 + tmp_a_r = buf[i+2].real; | |
1694 + tmp_a_i = buf[i+2].imag; | |
1695 + /* WARNING re <-> im & sign */ | |
1696 + tmp_b_r = buf[i+6].imag; | |
1697 + tmp_b_i = - buf[i+6].real; | |
1698 + buf[i+2].real = tmp_a_r + tmp_b_r; | |
1699 + buf[i+2].imag = tmp_a_i + tmp_b_i; | |
1700 + buf[i+6].real = tmp_a_r - tmp_b_r; | |
1701 + buf[i+6].imag = tmp_a_i - tmp_b_i; | |
1702 + tmp_a_r = buf[i+3].real; | |
1703 + tmp_a_i = buf[i+3].imag; | |
1704 + tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; | |
1705 + tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; | |
1706 + buf[i+3].real = tmp_a_r + tmp_b_r; | |
1707 + buf[i+3].imag = tmp_a_i + tmp_b_i; | |
1708 + buf[i+7].real = tmp_a_r - tmp_b_r; | |
1709 + buf[i+7].imag = tmp_a_i - tmp_b_i; | |
1710 +#else | |
1711 + vector float buf01, buf23, buf45, buf67; | |
1712 + | |
1713 + buf01 = vec_ld((i + 0) << 3, (float*)buf); | |
1714 + buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
1715 + | |
1716 + tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; | |
1717 + tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; | |
1718 + buf[i+5].real = tmp_b_r; | |
1719 + buf[i+5].imag = tmp_b_i; | |
1720 + tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; | |
1721 + tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; | |
1722 + buf[i+7].real = tmp_b_r; | |
1723 + buf[i+7].imag = tmp_b_i; | |
1724 + | |
1725 + buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
1726 + buf45 = vec_ld((i + 4) << 3, (float*)buf); | |
1727 + buf67 = vec_ld((i + 6) << 3, (float*)buf); | |
1728 + buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); | |
1729 + | |
1730 + vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); | |
1731 + vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); | |
1732 + vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); | |
1733 + vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); | |
1734 +#endif | |
1735 + } | |
1736 + | |
1737 + /* 4-7. iterations */ | |
1738 + for (m=3; m < 7; m++) { | |
1739 + two_m = (1 << m); | |
1740 + | |
1741 + two_m_plus_one = two_m<<1; | |
1742 + | |
1743 + for(i = 0; i < 128; i += two_m_plus_one) { | |
1744 + for(k = 0; k < two_m; k+=2) { | |
1745 +#if 0 | |
1746 + int p = k + i; | |
1747 + int q = p + two_m; | |
1748 + tmp_a_r = buf[p].real; | |
1749 + tmp_a_i = buf[p].imag; | |
1750 + tmp_b_r = | |
1751 + buf[q].real * w[m][k].real - | |
1752 + buf[q].imag * w[m][k].imag; | |
1753 + tmp_b_i = | |
1754 + buf[q].imag * w[m][k].real + | |
1755 + buf[q].real * w[m][k].imag; | |
1756 + buf[p].real = tmp_a_r + tmp_b_r; | |
1757 + buf[p].imag = tmp_a_i + tmp_b_i; | |
1758 + buf[q].real = tmp_a_r - tmp_b_r; | |
1759 + buf[q].imag = tmp_a_i - tmp_b_i; | |
1760 + | |
1761 + tmp_a_r = buf[(p + 1)].real; | |
1762 + tmp_a_i = buf[(p + 1)].imag; | |
1763 + tmp_b_r = | |
1764 + buf[(q + 1)].real * w[m][(k + 1)].real - | |
1765 + buf[(q + 1)].imag * w[m][(k + 1)].imag; | |
1766 + tmp_b_i = | |
1767 + buf[(q + 1)].imag * w[m][(k + 1)].real + | |
1768 + buf[(q + 1)].real * w[m][(k + 1)].imag; | |
1769 + buf[(p + 1)].real = tmp_a_r + tmp_b_r; | |
1770 + buf[(p + 1)].imag = tmp_a_i + tmp_b_i; | |
1771 + buf[(q + 1)].real = tmp_a_r - tmp_b_r; | |
1772 + buf[(q + 1)].imag = tmp_a_i - tmp_b_i; | |
1773 +#else | |
1774 + int p = k + i; | |
1775 + int q = p + two_m; | |
1776 + vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4; | |
1777 + const vector float vczero = (const vector float)FOUROF(0.); | |
1778 + // first compute buf[q] and buf[q+1] | |
1779 + vecq = vec_ld(q << 3, (float*)buf); | |
1780 + vecw = vec_ld(0, (float*)&(w[m][k])); | |
1781 + temp1 = vec_madd(vecq, vecw, vczero); | |
1782 + temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2)); | |
1783 + temp2 = vec_madd(temp2, vecw, vczero); | |
1784 + temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2)); | |
1785 + temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3)); | |
1786 + vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); | |
1787 + // then butterfly with buf[p] and buf[p+1] | |
1788 + vecp = vec_ld(p << 3, (float*)buf); | |
1789 + | |
1790 + temp1 = vec_add(vecp, vecq); | |
1791 + temp2 = vec_sub(vecp, vecq); | |
1792 + | |
1793 + vec_st(temp1, p << 3, (float*)buf); | |
1794 + vec_st(temp2, q << 3, (float*)buf); | |
1795 +#endif | |
1796 + } | |
1797 + } | |
1798 + } | |
1799 + | |
1800 + /* Post IFFT complex multiply plus IFFT complex conjugate*/ | |
1801 + for( i=0; i < 128; i+=4) { | |
1802 + /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ | |
1803 +#if 0 | |
1804 + tmp_a_r = buf[(i + 0)].real; | |
1805 + tmp_a_i = -1.0 * buf[(i + 0)].imag; | |
1806 + buf[(i + 0)].real = | |
1807 + (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]); | |
1808 + buf[(i + 0)].imag = | |
1809 + (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]); | |
1810 + | |
1811 + tmp_a_r = buf[(i + 1)].real; | |
1812 + tmp_a_i = -1.0 * buf[(i + 1)].imag; | |
1813 + buf[(i + 1)].real = | |
1814 + (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]); | |
1815 + buf[(i + 1)].imag = | |
1816 + (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]); | |
1817 + | |
1818 + tmp_a_r = buf[(i + 2)].real; | |
1819 + tmp_a_i = -1.0 * buf[(i + 2)].imag; | |
1820 + buf[(i + 2)].real = | |
1821 + (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]); | |
1822 + buf[(i + 2)].imag = | |
1823 + (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]); | |
1824 + | |
1825 + tmp_a_r = buf[(i + 3)].real; | |
1826 + tmp_a_i = -1.0 * buf[(i + 3)].imag; | |
1827 + buf[(i + 3)].real = | |
1828 + (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]); | |
1829 + buf[(i + 3)].imag = | |
1830 + (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]); | |
1831 +#else | |
1832 + vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2; | |
1833 + vector float temp0022, temp1133, tempCS01; | |
1834 + const vector float vczero = (const vector float)FOUROF(0.); | |
1835 + | |
1836 + bufv_0 = vec_ld((i + 0) << 3, (float*)buf); | |
1837 + bufv_2 = vec_ld((i + 2) << 3, (float*)buf); | |
1838 + | |
1839 + cosv = vec_ld(i << 2, xcos1); | |
1840 + sinv = vec_ld(i << 2, xsin1); | |
1841 + | |
1842 + temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2)); | |
1843 + temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3)); | |
1844 + tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1)); | |
1845 + temp1 = vec_madd(temp0022, tempCS01, vczero); | |
1846 + tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); | |
1847 + temp2 = vec_madd(temp1133, tempCS01, vczero); | |
1848 + bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); | |
1849 + | |
1850 + vec_st(bufv_0, (i + 0) << 3, (float*)buf); | |
1851 + | |
1852 + /* idem with bufv_2 and high-order cosv/sinv */ | |
1853 + | |
1854 + temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2)); | |
1855 + temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3)); | |
1856 + tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3)); | |
1857 + temp1 = vec_madd(temp0022, tempCS01, vczero); | |
1858 + tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3)); | |
1859 + temp2 = vec_madd(temp1133, tempCS01, vczero); | |
1860 + bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); | |
1861 + | |
1862 + vec_st(bufv_2, (i + 2) << 3, (float*)buf); | |
1863 + | |
1864 +#endif | |
1865 + } | |
1866 + | |
1867 + data_ptr = data; | |
1868 + delay_ptr = delay; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1869 + window_ptr = a52_imdct_window; |
14990 | 1870 + |
1871 + /* Window and convert to real valued signal */ | |
1872 + for(i=0; i< 64; i++) { | |
1873 + *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; | |
1874 + *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; | |
1875 + } | |
1876 + | |
1877 + for(i=0; i< 64; i++) { | |
1878 + *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; | |
1879 + *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; | |
1880 + } | |
1881 + | |
1882 + /* The trailing edge of the window goes into the delay line */ | |
1883 + delay_ptr = delay; | |
1884 + | |
1885 + for(i=0; i< 64; i++) { | |
1886 + *delay_ptr++ = -buf[64+i].real * *--window_ptr; | |
1887 + *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; | |
1888 + } | |
1889 + | |
1890 + for(i=0; i<64; i++) { | |
1891 + *delay_ptr++ = buf[i].imag * *--window_ptr; | |
1892 + *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; | |
1893 + } | |
1894 +} | |
1895 +#endif | |
1896 + | |
1897 + | |
1898 +// Stuff below this line is borrowed from libac3 | |
1899 +#include "srfftp.h" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1900 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 1901 +#ifndef HAVE_3DNOW |
1902 +#define HAVE_3DNOW 1 | |
1903 +#endif | |
1904 +#include "srfftp_3dnow.h" | |
1905 + | |
1906 +const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; | |
1907 +const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; | |
1908 +const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 }; | |
1909 + | |
1910 +#undef HAVE_3DNOWEX | |
1911 +#include "imdct_3dnow.h" | |
1912 +#define HAVE_3DNOWEX | |
1913 +#include "imdct_3dnow.h" | |
1914 + | |
1915 +void | |
1916 +imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) | |
1917 +{ | |
1918 +/* int i,k; | |
1919 + int p,q;*/ | |
1920 + int m; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1921 + long two_m; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1922 + long two_m_plus_one; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1923 + long two_m_plus_one_shl3; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1924 + complex_t *buf_offset; |
14990 | 1925 + |
1926 +/* sample_t tmp_a_i; | |
1927 + sample_t tmp_a_r; | |
1928 + sample_t tmp_b_i; | |
1929 + sample_t tmp_b_r;*/ | |
1930 + | |
1931 + sample_t *data_ptr; | |
1932 + sample_t *delay_ptr; | |
1933 + sample_t *window_ptr; | |
1934 + | |
1935 + /* 512 IMDCT with source and dest data in 'data' */ | |
1936 + /* see the c version (dct_do_512()), its allmost identical, just in C */ | |
1937 + | |
1938 + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ | |
1939 + /* Bit reversed shuffling */ | |
1940 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1941 + "xor %%"REG_S", %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1942 + "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1943 + "mov $1008, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1944 + "push %%"REG_BP" \n\t" //use ebp without telling gcc |
19373 | 1945 + ASMALIGN(4) |
14990 | 1946 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1947 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1948 + "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1949 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1950 + "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi |
14990 | 1951 + "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1952 + "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t" |
14990 | 1953 + "mulps %%xmm0, %%xmm2 \n\t" |
1954 + "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1955 + "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" |
14990 | 1956 + "subps %%xmm0, %%xmm2 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1957 + "movzb (%%"REG_a"), %%"REG_d" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1958 + "movzb 1(%%"REG_a"), %%"REG_BP" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1959 + "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1960 + "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1961 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1962 + "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1963 + "sub $16, %%"REG_D" \n\t" |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1964 + "jnc 1b \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1965 + "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* |
14990 | 1966 + :: "b" (data), "c" (buf) |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
1967 + : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d |
14990 | 1968 + ); |
1969 + | |
1970 + | |
1971 + /* FFT Merge */ | |
1972 +/* unoptimized variant | |
1973 + for (m=1; m < 7; m++) { | |
1974 + if(m) | |
1975 + two_m = (1 << m); | |
1976 + else | |
1977 + two_m = 1; | |
1978 + | |
1979 + two_m_plus_one = (1 << (m+1)); | |
1980 + | |
1981 + for(i = 0; i < 128; i += two_m_plus_one) { | |
1982 + for(k = 0; k < two_m; k++) { | |
1983 + p = k + i; | |
1984 + q = p + two_m; | |
1985 + tmp_a_r = buf[p].real; | |
1986 + tmp_a_i = buf[p].imag; | |
1987 + tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag; | |
1988 + tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag; | |
1989 + buf[p].real = tmp_a_r + tmp_b_r; | |
1990 + buf[p].imag = tmp_a_i + tmp_b_i; | |
1991 + buf[q].real = tmp_a_r - tmp_b_r; | |
1992 + buf[q].imag = tmp_a_i - tmp_b_i; | |
1993 + } | |
1994 + } | |
1995 + } | |
1996 +*/ | |
1997 + | |
1998 + /* 1. iteration */ | |
1999 + // Note w[0][0]={1,0} | |
2000 + asm volatile( | |
2001 + "xorps %%xmm1, %%xmm1 \n\t" | |
2002 + "xorps %%xmm2, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2003 + "mov %0, %%"REG_S" \n\t" |
19373 | 2004 + ASMALIGN(4) |
14990 | 2005 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2006 + "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2007 + "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2008 + "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p] |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2009 + "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q] |
14990 | 2010 + "addps %%xmm1, %%xmm0 \n\t" |
2011 + "subps %%xmm2, %%xmm0 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2012 + "movaps %%xmm0, (%%"REG_S")\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2013 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2014 + "cmp %1, %%"REG_S" \n\t" |
14990 | 2015 + " jb 1b \n\t" |
2016 + :: "g" (buf), "r" (buf + 128) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2017 + : "%"REG_S |
14990 | 2018 + ); |
2019 + | |
2020 + /* 2. iteration */ | |
2021 + // Note w[1]={{1,0}, {0,-1}} | |
2022 + asm volatile( | |
2023 + "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2024 + "mov %0, %%"REG_S" \n\t" |
19373 | 2025 + ASMALIGN(4) |
14990 | 2026 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2027 + "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 |
14990 | 2028 + "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 |
2029 + "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2030 + "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2031 + "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1 |
14990 | 2032 + "addps %%xmm2, %%xmm0 \n\t" |
2033 + "subps %%xmm2, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2034 + "movaps %%xmm0, (%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2035 + "movaps %%xmm1, 16(%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2036 + "add $32, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2037 + "cmp %1, %%"REG_S" \n\t" |
14990 | 2038 + " jb 1b \n\t" |
2039 + :: "g" (buf), "r" (buf + 128) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2040 + : "%"REG_S |
14990 | 2041 + ); |
2042 + | |
2043 + /* 3. iteration */ | |
2044 +/* | |
2045 + Note sseW2+0={1,1,sqrt(2),sqrt(2)) | |
2046 + Note sseW2+16={0,0,sqrt(2),-sqrt(2)) | |
2047 + Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) | |
2048 + Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) | |
2049 +*/ | |
2050 + asm volatile( | |
2051 + "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" | |
2052 + "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" | |
2053 + "xorps %%xmm5, %%xmm5 \n\t" | |
2054 + "xorps %%xmm2, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2055 + "mov %0, %%"REG_S" \n\t" |
19373 | 2056 + ASMALIGN(4) |
14990 | 2057 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2058 + "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2059 + "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 |
14990 | 2060 + "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 |
2061 + "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 | |
2062 + "mulps %%xmm2, %%xmm4 \n\t" | |
2063 + "mulps %%xmm3, %%xmm5 \n\t" | |
2064 + "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5 | |
2065 + "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 | |
2066 + "mulps %%xmm6, %%xmm3 \n\t" | |
2067 + "mulps %%xmm7, %%xmm2 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2068 + "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2069 + "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3 |
14990 | 2070 + "addps %%xmm4, %%xmm2 \n\t" |
2071 + "addps %%xmm5, %%xmm3 \n\t" | |
2072 + "movaps %%xmm2, %%xmm4 \n\t" | |
2073 + "movaps %%xmm3, %%xmm5 \n\t" | |
2074 + "addps %%xmm0, %%xmm2 \n\t" | |
2075 + "addps %%xmm1, %%xmm3 \n\t" | |
2076 + "subps %%xmm4, %%xmm0 \n\t" | |
2077 + "subps %%xmm5, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2078 + "movaps %%xmm2, (%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2079 + "movaps %%xmm3, 16(%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2080 + "movaps %%xmm0, 32(%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2081 + "movaps %%xmm1, 48(%%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2082 + "add $64, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2083 + "cmp %1, %%"REG_S" \n\t" |
14990 | 2084 + " jb 1b \n\t" |
2085 + :: "g" (buf), "r" (buf + 128) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2086 + : "%"REG_S |
14990 | 2087 + ); |
2088 + | |
2089 + /* 4-7. iterations */ | |
2090 + for (m=3; m < 7; m++) { | |
2091 + two_m = (1 << m); | |
2092 + two_m_plus_one = two_m<<1; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2093 + two_m_plus_one_shl3 = (two_m_plus_one<<3); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2094 + buf_offset = buf+128; |
14990 | 2095 + asm volatile( |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2096 + "mov %0, %%"REG_S" \n\t" |
19373 | 2097 + ASMALIGN(4) |
14990 | 2098 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2099 + "xor %%"REG_D", %%"REG_D" \n\t" // k |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2100 + "lea (%%"REG_S", %3), %%"REG_d" \n\t" |
14990 | 2101 + "2: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2102 + "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2103 + "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t" |
14990 | 2104 + "mulps %%xmm1, %%xmm2 \n\t" |
2105 + "shufps $0xB1, %%xmm1, %%xmm1 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2106 + "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2107 + "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t" |
14990 | 2108 + "addps %%xmm2, %%xmm1 \n\t" |
2109 + "movaps %%xmm1, %%xmm2 \n\t" | |
2110 + "addps %%xmm0, %%xmm1 \n\t" | |
2111 + "subps %%xmm2, %%xmm0 \n\t" | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2112 + "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2113 + "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2114 + "add $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2115 + "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2116 + "jb 2b \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2117 + "add %2, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2118 + "cmp %1, %%"REG_S" \n\t" |
14990 | 2119 + " jb 1b \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2120 + :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), |
14990 | 2121 + "r" (sseW[m]) |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2122 + : "%"REG_S, "%"REG_D, "%"REG_d |
14990 | 2123 + ); |
2124 + } | |
2125 + | |
2126 + /* Post IFFT complex multiply plus IFFT complex conjugate*/ | |
2127 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2128 + "mov $-1024, %%"REG_S" \n\t" |
19373 | 2129 + ASMALIGN(4) |
14990 | 2130 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2131 + "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2132 + "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
14990 | 2133 + "shufps $0xB1, %%xmm0, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2134 + "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2135 + "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" |
14990 | 2136 + "addps %%xmm1, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2137 + "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2138 + "add $16, %%"REG_S" \n\t" |
14990 | 2139 + " jnz 1b \n\t" |
2140 + :: "r" (buf+128) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2141 + : "%"REG_S |
14990 | 2142 + ); |
2143 + | |
2144 + | |
2145 + data_ptr = data; | |
2146 + delay_ptr = delay; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2147 + window_ptr = a52_imdct_window; |
14990 | 2148 + |
2149 + /* Window and convert to real valued signal */ | |
2150 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2151 + "xor %%"REG_D", %%"REG_D" \n\t" // 0 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2152 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
14990 | 2153 + "movss %3, %%xmm2 \n\t" // bias |
2154 + "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... | |
19373 | 2155 + ASMALIGN(4) |
14990 | 2156 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2157 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2158 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2159 + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2160 + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? |
14990 | 2161 + "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2162 + "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2163 + "addps (%2, %%"REG_S"), %%xmm0 \n\t" |
14990 | 2164 + "addps %%xmm2, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2165 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2166 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2167 + "sub $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2168 + "cmp $512, %%"REG_S" \n\t" |
14990 | 2169 + " jb 1b \n\t" |
2170 + :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2171 + : "%"REG_S, "%"REG_D |
14990 | 2172 + ); |
2173 + data_ptr+=128; | |
2174 + delay_ptr+=128; | |
2175 +// window_ptr+=128; | |
2176 + | |
2177 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2178 + "mov $1024, %%"REG_D" \n\t" // 512 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2179 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
14990 | 2180 + "movss %3, %%xmm2 \n\t" // bias |
2181 + "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... | |
19373 | 2182 + ASMALIGN(4) |
14990 | 2183 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2184 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2185 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2186 + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2187 + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A |
14990 | 2188 + "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2189 + "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2190 + "addps (%2, %%"REG_S"), %%xmm0 \n\t" |
14990 | 2191 + "addps %%xmm2, %%xmm0 \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2192 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2193 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2194 + "sub $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2195 + "cmp $512, %%"REG_S" \n\t" |
14990 | 2196 + " jb 1b \n\t" |
2197 + :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2198 + : "%"REG_S, "%"REG_D |
14990 | 2199 + ); |
2200 + data_ptr+=128; | |
2201 +// window_ptr+=128; | |
2202 + | |
2203 + /* The trailing edge of the window goes into the delay line */ | |
2204 + delay_ptr = delay; | |
2205 + | |
2206 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2207 + "xor %%"REG_D", %%"REG_D" \n\t" // 0 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2208 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
19373 | 2209 + ASMALIGN(4) |
14990 | 2210 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2211 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2212 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2213 + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2214 + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A |
14990 | 2215 + "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2216 + "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2217 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2218 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2219 + "sub $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2220 + "cmp $512, %%"REG_S" \n\t" |
14990 | 2221 + " jb 1b \n\t" |
2222 + :: "r" (buf+64), "r" (delay_ptr) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2223 + : "%"REG_S, "%"REG_D |
14990 | 2224 + ); |
2225 + delay_ptr+=128; | |
2226 +// window_ptr-=128; | |
2227 + | |
2228 + asm volatile( | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2229 + "mov $1024, %%"REG_D" \n\t" // 1024 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2230 + "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
19373 | 2231 + ASMALIGN(4) |
14990 | 2232 + "1: \n\t" |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2233 + "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2234 + "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2235 + "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2236 + "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? |
14990 | 2237 + "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2238 + "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2239 + "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2240 + "add $16, %%"REG_S" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2241 + "sub $16, %%"REG_D" \n\t" |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2242 + "cmp $512, %%"REG_S" \n\t" |
14990 | 2243 + " jb 1b \n\t" |
2244 + :: "r" (buf), "r" (delay_ptr) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2245 + : "%"REG_S, "%"REG_D |
14990 | 2246 + ); |
2247 +} | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2248 +#endif // ARCH_X86 || ARCH_X86_64 |
14990 | 2249 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2250 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) |
14990 | 2251 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2252 int i, k; |
25995 | 2253 @@ -364,7 +1145,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2254 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2255 void a52_imdct_init (uint32_t mm_accel) |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2256 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2257 - int i, k; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2258 + int i, j, k; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2259 double sum; |
14990 | 2260 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2261 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ |
25995 | 2262 @@ -416,6 +1197,99 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2263 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2264 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2265 } |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2266 + for (i = 0; i < 128; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2267 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2268 + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2269 + } |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2270 + for (i = 0; i < 7; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2271 + j = 1 << i; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2272 + for (k = 0; k < j; k++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2273 + w[i][k].real = cos (-M_PI * k / j); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2274 + w[i][k].imag = sin (-M_PI * k / j); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2275 + } |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2276 + } |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2277 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 2278 + for (i = 0; i < 128; i++) { |
2279 + sseSinCos1c[2*i+0]= xcos1[i]; | |
2280 + sseSinCos1c[2*i+1]= -xcos1[i]; | |
2281 + sseSinCos1d[2*i+0]= xsin1[i]; | |
2282 + sseSinCos1d[2*i+1]= xsin1[i]; | |
2283 + } | |
2284 + for (i = 1; i < 7; i++) { | |
2285 + j = 1 << i; | |
2286 + for (k = 0; k < j; k+=2) { | |
2287 + | |
2288 + sseW[i][4*k + 0] = w[i][k+0].real; | |
2289 + sseW[i][4*k + 1] = w[i][k+0].real; | |
2290 + sseW[i][4*k + 2] = w[i][k+1].real; | |
2291 + sseW[i][4*k + 3] = w[i][k+1].real; | |
2292 + | |
2293 + sseW[i][4*k + 4] = -w[i][k+0].imag; | |
2294 + sseW[i][4*k + 5] = w[i][k+0].imag; | |
2295 + sseW[i][4*k + 6] = -w[i][k+1].imag; | |
2296 + sseW[i][4*k + 7] = w[i][k+1].imag; | |
2297 + | |
2298 + //we multiply more or less uninitalized numbers so we need to use exactly 0.0 | |
2299 + if(k==0) | |
2300 + { | |
2301 +// sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0; | |
2302 + sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0; | |
2303 + } | |
2304 + | |
2305 + if(2*k == j) | |
2306 + { | |
2307 + sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0; | |
2308 +// sseW[i][4*k + 4]= -(sseW[i][4*k + 5]= -1.0); | |
2309 + } | |
2310 + } | |
2311 + } | |
2312 + | |
2313 + for(i=0; i<128; i++) | |
2314 + { | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2315 + sseWindow[2*i+0]= -a52_imdct_window[2*i+0]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2316 + sseWindow[2*i+1]= a52_imdct_window[2*i+1]; |
14990 | 2317 + } |
2318 + | |
2319 + for(i=0; i<64; i++) | |
2320 + { | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2321 + sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2322 + sseWindow[256 + 2*i+1]= a52_imdct_window[254 - 2*i+0]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2323 + sseWindow[384 + 2*i+0]= a52_imdct_window[126 - 2*i+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2324 + sseWindow[384 + 2*i+1]= -a52_imdct_window[126 - 2*i+0]; |
14990 | 2325 + } |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2326 +#endif |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2327 + a52_imdct_512 = imdct_do_512; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2328 + ifft128 = ifft128_c; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2329 + ifft64 = ifft64_c; |
14990 | 2330 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2331 +#if defined(ARCH_X86) || defined(ARCH_X86_64) |
14990 | 2332 + if(mm_accel & MM_ACCEL_X86_SSE) |
2333 + { | |
2334 + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2335 + a52_imdct_512 = imdct_do_512_sse; |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2336 + } |
14990 | 2337 + else |
2338 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) | |
2339 + { | |
2340 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2341 + a52_imdct_512 = imdct_do_512_3dnowex; |
14990 | 2342 + } |
2343 + else | |
2344 + if(mm_accel & MM_ACCEL_X86_3DNOW) | |
2345 + { | |
2346 + fprintf (stderr, "Using 3DNow optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2347 + a52_imdct_512 = imdct_do_512_3dnow; |
14990 | 2348 + } |
2349 + else | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2350 +#endif // ARCH_X86 || ARCH_X86_64 |
14990 | 2351 +#ifdef HAVE_ALTIVEC |
2352 + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) | |
2353 + { | |
2354 + fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2355 + a52_imdct_512 = imdct_do_512_altivec; |
14990 | 2356 + } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2357 + else |
14990 | 2358 +#endif |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2359 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2360 #ifdef LIBA52_DJBFFT |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2361 if (mm_accel & MM_ACCEL_DJBFFT) { |
25995 | 2362 @@ -426,7 +1300,5 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2363 #endif |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2364 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2365 fprintf (stderr, "No accelerated IMDCT transform found\n"); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2366 - ifft128 = ifft128_c; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2367 - ifft64 = ifft64_c; |
14990 | 2368 } |
2369 } | |
19249 | 2370 --- include/mm_accel.h 2006-06-12 15:05:00.000000000 +0200 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2371 +++ liba52/mm_accel.h 2006-06-05 02:23:04.000000000 +0200 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2372 @@ -30,7 +34,12 @@ |
14990 | 2373 /* x86 accelerations */ |
2374 #define MM_ACCEL_X86_MMX 0x80000000 | |
2375 #define MM_ACCEL_X86_3DNOW 0x40000000 | |
2376 +#define MM_ACCEL_X86_3DNOWEXT 0x08000000 | |
2377 #define MM_ACCEL_X86_MMXEXT 0x20000000 | |
2378 +#define MM_ACCEL_X86_SSE 0x10000000 | |
2379 + | |
2380 +/* PPC accelerations */ | |
2381 +#define MM_ACCEL_PPC_ALTIVEC 0x00010000 | |
2382 | |
2383 uint32_t mm_accel (void); | |
2384 | |
21491 | 2385 --- liba52/parse.c 2006-12-05 08:08:01.000000000 +0100 |
2386 +++ liba52/parse.c 2006-12-05 08:08:44.000000000 +0100 | |
2387 @@ -28,6 +28,7 @@ | |
14990 | 2388 #include "config.h" |
2389 | |
2390 #include <stdlib.h> | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2391 +#include <stdio.h> |
14990 | 2392 #include <string.h> |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2393 #include <inttypes.h> |
14990 | 2394 |
21491 | 2395 @@ -35,13 +36,12 @@ |
14990 | 2396 #include "a52_internal.h" |
2397 #include "bitstream.h" | |
2398 #include "tables.h" | |
2399 +#include "mm_accel.h" | |
21491 | 2400 +#include "libavutil/avutil.h" |
14990 | 2401 |
2402 #ifdef HAVE_MEMALIGN | |
2403 /* some systems have memalign() but no declaration for it */ | |
2404 void * memalign (size_t align, size_t size); | |
2405 -#else | |
2406 -/* assume malloc alignment is sufficient */ | |
2407 -#define memalign(align,size) malloc (size) | |
2408 #endif | |
2409 | |
2410 typedef struct { | |
21491 | 2411 @@ -64,7 +64,16 @@ |
2412 if (state == NULL) | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2413 return NULL; |
14990 | 2414 |
21491 | 2415 +#if defined(__MINGW32__) && defined(HAVE_SSE) |
2416 + state->samples = av_malloc(256 * 12 * sizeof (sample_t)); | |
2417 +#else | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2418 state->samples = memalign (16, 256 * 12 * sizeof (sample_t)); |
14990 | 2419 +#endif |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2420 + if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){ |
14990 | 2421 + mm_accel &=~MM_ACCEL_X86_SSE; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2422 + fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n"); |
14990 | 2423 + } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
2424 + |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2425 if (state->samples == NULL) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2426 free (state); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2427 return NULL; |
21491 | 2428 @@ -78,6 +87,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2429 state->lfsr_state = 1; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2430 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2431 a52_imdct_init (mm_accel); |
14990 | 2432 + downmix_accel_init(mm_accel); |
2433 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2434 return state; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2435 } |
21491 | 2436 @@ -145,7 +155,7 @@ |
14990 | 2437 state->acmod = acmod = buf[6] >> 5; |
2438 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2439 a52_bitstream_set_ptr (state, buf + 6); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2440 - bitstream_get (state, 3); /* skip acmod we already parsed */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2441 + bitstream_skip (state, 3); /* skip acmod we already parsed */ |
14990 | 2442 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2443 if ((acmod == 2) && (bitstream_get (state, 2) == 2)) /* dsurmod */ |
14990 | 2444 acmod = A52_DOLBY; |
21491 | 2445 @@ -176,28 +186,28 @@ |
14990 | 2446 |
2447 chaninfo = !acmod; | |
2448 do { | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2449 - bitstream_get (state, 5); /* dialnorm */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2450 + bitstream_skip (state, 5); /* dialnorm */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2451 if (bitstream_get (state, 1)) /* compre */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2452 - bitstream_get (state, 8); /* compr */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2453 + bitstream_skip (state, 8); /* compr */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2454 if (bitstream_get (state, 1)) /* langcode */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2455 - bitstream_get (state, 8); /* langcod */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2456 + bitstream_skip (state, 8); /* langcod */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2457 if (bitstream_get (state, 1)) /* audprodie */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2458 - bitstream_get (state, 7); /* mixlevel + roomtyp */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2459 + bitstream_skip (state, 7); /* mixlevel + roomtyp */ |
14990 | 2460 } while (chaninfo--); |
2461 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2462 - bitstream_get (state, 2); /* copyrightb + origbs */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2463 + bitstream_skip (state, 2); /* copyrightb + origbs */ |
14990 | 2464 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2465 if (bitstream_get (state, 1)) /* timecod1e */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2466 - bitstream_get (state, 14); /* timecod1 */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2467 + bitstream_skip (state, 14); /* timecod1 */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2468 if (bitstream_get (state, 1)) /* timecod2e */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2469 - bitstream_get (state, 14); /* timecod2 */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2470 + bitstream_skip (state, 14); /* timecod2 */ |
14990 | 2471 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2472 if (bitstream_get (state, 1)) { /* addbsie */ |
14990 | 2473 int addbsil; |
2474 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2475 addbsil = bitstream_get (state, 6); |
14990 | 2476 do { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2477 - bitstream_get (state, 8); /* addbsi */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2478 + bitstream_skip (state, 8); /* addbsi */ |
14990 | 2479 } while (addbsil--); |
2480 } | |
2481 | |
21491 | 2482 @@ -684,7 +694,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2483 state->fbw_expbap[i].exp[0], |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2484 state->fbw_expbap[i].exp + 1)) |
14990 | 2485 return 1; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2486 - bitstream_get (state, 2); /* gainrng */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2487 + bitstream_skip (state, 2); /* gainrng */ |
14990 | 2488 } |
2489 if (lfeexpstr != EXP_REUSE) { | |
2490 do_bit_alloc |= 32; | |
21491 | 2491 @@ -759,7 +769,7 @@ |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2492 if (bitstream_get (state, 1)) { /* skiple */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2493 i = bitstream_get (state, 9); /* skipl */ |
14990 | 2494 while (i--) |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2495 - bitstream_get (state, 8); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2496 + bitstream_skip (state, 8); |
14990 | 2497 } |
2498 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
14990
diff
changeset
|
2499 samples = state->samples; |
21491 | 2500 @@ -900,6 +910,10 @@ |
2501 | |
2502 void a52_free (a52_state_t * state) | |
2503 { | |
2504 - free (state->samples); | |
2505 +#if defined(__MINGW32__) && defined(HAVE_SSE) | |
2506 + av_free (state->samples); | |
2507 +#else | |
2508 + free (state->samples); | |
2509 +#endif | |
2510 free (state); | |
2511 } |