Mercurial > mplayer.hg
comparison liba52/liba52_changes.diff @ 25995:236ab58453f7
Refactor AltiVec macros as done for FFmpeg.
author | diego |
---|---|
date | Mon, 18 Feb 2008 23:32:17 +0000 |
parents | 6f0309e575e0 |
children | 2aadf9302854 |
comparison
equal
deleted
inserted
replaced
25994:32e21d1beb48 | 25995:236ab58453f7 |
---|---|
1426 + } | 1426 + } |
1427 + __asm __volatile("femms":::"memory"); | 1427 + __asm __volatile("femms":::"memory"); |
1428 +} | 1428 +} |
1429 + | 1429 + |
1430 +#endif // ARCH_X86 || ARCH_X86_64 | 1430 +#endif // ARCH_X86 || ARCH_X86_64 |
1431 --- liba52/imdct.c 2006-06-12 15:18:27.000000000 +0200 | 1431 --- liba52/imdct.c 2008-02-19 00:18:33.000000000 +0100 |
1432 +++ liba52/imdct.c 2006-06-12 19:18:39.000000000 +0200 | 1432 +++ liba52/imdct.c 2008-02-19 00:16:40.000000000 +0100 |
1433 @@ -26,6 +26,11 @@ | 1433 @@ -22,6 +26,11 @@ |
1434 * You should have received a copy of the GNU General Public License | 1434 * You should have received a copy of the GNU General Public License |
1435 * along with this program; if not, write to the Free Software | 1435 * along with this program; if not, write to the Free Software |
1436 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 1436 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
1437 + * | 1437 + * |
1438 + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | 1438 + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) |
1540 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) | 1540 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) |
1541 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) | 1541 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) |
1542 { | 1542 { |
1543 int i, k; | 1543 int i, k; |
1544 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; | 1544 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; |
1545 @@ -285,6 +365,714 @@ | 1545 @@ -285,6 +365,707 @@ |
1546 } | 1546 } |
1547 } | 1547 } |
1548 | 1548 |
1549 +#ifdef HAVE_ALTIVEC | 1549 +#ifdef HAVE_ALTIVEC |
1550 + | 1550 + |
1562 +#define WORD_s1 0x14,0x15,0x16,0x17 | 1562 +#define WORD_s1 0x14,0x15,0x16,0x17 |
1563 +#define WORD_s2 0x18,0x19,0x1a,0x1b | 1563 +#define WORD_s2 0x18,0x19,0x1a,0x1b |
1564 +#define WORD_s3 0x1c,0x1d,0x1e,0x1f | 1564 +#define WORD_s3 0x1c,0x1d,0x1e,0x1f |
1565 + | 1565 + |
1566 +#ifdef __APPLE_CC__ | 1566 +#ifdef __APPLE_CC__ |
1567 +#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) | 1567 +#define AVV(x...) (x) |
1568 +#else | 1568 +#else |
1569 +#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} | 1569 +#define AVV(x...) {x} |
1570 +#endif | 1570 +#endif |
1571 + | |
1572 +#define vcprm(a,b,c,d) (const vector unsigned char)AVV(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) | |
1573 +#define vcii(a,b,c,d) (const vector float)AVV(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) | |
1574 + | |
1575 +#define FOUROF(a) AVV(a,a,a,a) | |
1571 + | 1576 + |
1572 +// vcprmle is used to keep the same index as in the SSE version. | 1577 +// vcprmle is used to keep the same index as in the SSE version. |
1573 +// it's the same as vcprm, with the index inversed | 1578 +// it's the same as vcprm, with the index inversed |
1574 +// ('le' is Little Endian) | 1579 +// ('le' is Little Endian) |
1575 +#define vcprmle(a,b,c,d) vcprm(d,c,b,a) | 1580 +#define vcprmle(a,b,c,d) vcprm(d,c,b,a) |
1576 + | 1581 + |
1577 +// used to build inverse/identity vectors (vcii) | 1582 +// used to build inverse/identity vectors (vcii) |
1578 +// n is _n_egative, p is _p_ositive | 1583 +// n is _n_egative, p is _p_ositive |
1579 +#define FLOAT_n -1. | 1584 +#define FLOAT_n -1. |
1580 +#define FLOAT_p 1. | 1585 +#define FLOAT_p 1. |
1581 + | |
1582 +#ifdef __APPLE_CC__ | |
1583 +#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) | |
1584 +#else | |
1585 +#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} | |
1586 +#endif | |
1587 + | |
1588 +#ifdef __APPLE_CC__ | |
1589 +#define FOUROF(a) (a) | |
1590 +#else | |
1591 +#define FOUROF(a) {a,a,a,a} | |
1592 +#endif | |
1593 + | 1586 + |
1594 + | 1587 + |
1595 +void | 1588 +void |
1596 +imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) | 1589 +imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) |
1597 +{ | 1590 +{ |
2255 +#endif // ARCH_X86 || ARCH_X86_64 | 2248 +#endif // ARCH_X86 || ARCH_X86_64 |
2256 + | 2249 + |
2257 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) | 2250 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) |
2258 { | 2251 { |
2259 int i, k; | 2252 int i, k; |
2260 @@ -364,7 +1152,7 @@ | 2253 @@ -364,7 +1145,7 @@ |
2261 | 2254 |
2262 void a52_imdct_init (uint32_t mm_accel) | 2255 void a52_imdct_init (uint32_t mm_accel) |
2263 { | 2256 { |
2264 - int i, k; | 2257 - int i, k; |
2265 + int i, j, k; | 2258 + int i, j, k; |
2266 double sum; | 2259 double sum; |
2267 | 2260 |
2268 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ | 2261 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ |
2269 @@ -416,6 +1204,99 @@ | 2262 @@ -416,6 +1197,99 @@ |
2270 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); | 2263 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); |
2271 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); | 2264 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); |
2272 } | 2265 } |
2273 + for (i = 0; i < 128; i++) { | 2266 + for (i = 0; i < 128; i++) { |
2274 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); | 2267 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); |
2364 + else | 2357 + else |
2365 +#endif | 2358 +#endif |
2366 | 2359 |
2367 #ifdef LIBA52_DJBFFT | 2360 #ifdef LIBA52_DJBFFT |
2368 if (mm_accel & MM_ACCEL_DJBFFT) { | 2361 if (mm_accel & MM_ACCEL_DJBFFT) { |
2369 @@ -426,7 +1307,5 @@ | 2362 @@ -426,7 +1300,5 @@ |
2370 #endif | 2363 #endif |
2371 { | 2364 { |
2372 fprintf (stderr, "No accelerated IMDCT transform found\n"); | 2365 fprintf (stderr, "No accelerated IMDCT transform found\n"); |
2373 - ifft128 = ifft128_c; | 2366 - ifft128 = ifft128_c; |
2374 - ifft64 = ifft64_c; | 2367 - ifft64 = ifft64_c; |