comparison liba52/liba52_changes.diff @ 25995:236ab58453f7

Refactor AltiVec macros as done for FFmpeg.
author diego
date Mon, 18 Feb 2008 23:32:17 +0000
parents 6f0309e575e0
children 2aadf9302854
comparison
equal deleted inserted replaced
25994:32e21d1beb48 25995:236ab58453f7
1426 + } 1426 + }
1427 + __asm __volatile("femms":::"memory"); 1427 + __asm __volatile("femms":::"memory");
1428 +} 1428 +}
1429 + 1429 +
1430 +#endif // ARCH_X86 || ARCH_X86_64 1430 +#endif // ARCH_X86 || ARCH_X86_64
1431 --- liba52/imdct.c 2006-06-12 15:18:27.000000000 +0200 1431 --- liba52/imdct.c 2008-02-19 00:18:33.000000000 +0100
1432 +++ liba52/imdct.c 2006-06-12 19:18:39.000000000 +0200 1432 +++ liba52/imdct.c 2008-02-19 00:16:40.000000000 +0100
1433 @@ -26,6 +26,11 @@ 1433 @@ -22,6 +26,11 @@
1434 * You should have received a copy of the GNU General Public License 1434 * You should have received a copy of the GNU General Public License
1435 * along with this program; if not, write to the Free Software 1435 * along with this program; if not, write to the Free Software
1436 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 1436 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1437 + * 1437 + *
1438 + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) 1438 + * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
1540 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) 1540 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
1541 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) 1541 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
1542 { 1542 {
1543 int i, k; 1543 int i, k;
1544 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; 1544 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
1545 @@ -285,6 +365,714 @@ 1545 @@ -285,6 +365,707 @@
1546 } 1546 }
1547 } 1547 }
1548 1548
1549 +#ifdef HAVE_ALTIVEC 1549 +#ifdef HAVE_ALTIVEC
1550 + 1550 +
1562 +#define WORD_s1 0x14,0x15,0x16,0x17 1562 +#define WORD_s1 0x14,0x15,0x16,0x17
1563 +#define WORD_s2 0x18,0x19,0x1a,0x1b 1563 +#define WORD_s2 0x18,0x19,0x1a,0x1b
1564 +#define WORD_s3 0x1c,0x1d,0x1e,0x1f 1564 +#define WORD_s3 0x1c,0x1d,0x1e,0x1f
1565 + 1565 +
1566 +#ifdef __APPLE_CC__ 1566 +#ifdef __APPLE_CC__
1567 +#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) 1567 +#define AVV(x...) (x)
1568 +#else 1568 +#else
1569 +#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} 1569 +#define AVV(x...) {x}
1570 +#endif 1570 +#endif
1571 +
1572 +#define vcprm(a,b,c,d) (const vector unsigned char)AVV(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)
1573 +#define vcii(a,b,c,d) (const vector float)AVV(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
1574 +
1575 +#define FOUROF(a) AVV(a,a,a,a)
1571 + 1576 +
1572 +// vcprmle is used to keep the same index as in the SSE version. 1577 +// vcprmle is used to keep the same index as in the SSE version.
1573 +// it's the same as vcprm, with the index inversed 1578 +// it's the same as vcprm, with the index inversed
1574 +// ('le' is Little Endian) 1579 +// ('le' is Little Endian)
1575 +#define vcprmle(a,b,c,d) vcprm(d,c,b,a) 1580 +#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
1576 + 1581 +
1577 +// used to build inverse/identity vectors (vcii) 1582 +// used to build inverse/identity vectors (vcii)
1578 +// n is _n_egative, p is _p_ositive 1583 +// n is _n_egative, p is _p_ositive
1579 +#define FLOAT_n -1. 1584 +#define FLOAT_n -1.
1580 +#define FLOAT_p 1. 1585 +#define FLOAT_p 1.
1581 +
1582 +#ifdef __APPLE_CC__
1583 +#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
1584 +#else
1585 +#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
1586 +#endif
1587 +
1588 +#ifdef __APPLE_CC__
1589 +#define FOUROF(a) (a)
1590 +#else
1591 +#define FOUROF(a) {a,a,a,a}
1592 +#endif
1593 + 1586 +
1594 + 1587 +
1595 +void 1588 +void
1596 +imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) 1589 +imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
1597 +{ 1590 +{
2255 +#endif // ARCH_X86 || ARCH_X86_64 2248 +#endif // ARCH_X86 || ARCH_X86_64
2256 + 2249 +
2257 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) 2250 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
2258 { 2251 {
2259 int i, k; 2252 int i, k;
2260 @@ -364,7 +1152,7 @@ 2253 @@ -364,7 +1145,7 @@
2261 2254
2262 void a52_imdct_init (uint32_t mm_accel) 2255 void a52_imdct_init (uint32_t mm_accel)
2263 { 2256 {
2264 - int i, k; 2257 - int i, k;
2265 + int i, j, k; 2258 + int i, j, k;
2266 double sum; 2259 double sum;
2267 2260
2268 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ 2261 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
2269 @@ -416,6 +1204,99 @@ 2262 @@ -416,6 +1197,99 @@
2270 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); 2263 post2[i].real = cos ((M_PI / 128) * (i + 0.5));
2271 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); 2264 post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
2272 } 2265 }
2273 + for (i = 0; i < 128; i++) { 2266 + for (i = 0; i < 128; i++) {
2274 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); 2267 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
2364 + else 2357 + else
2365 +#endif 2358 +#endif
2366 2359
2367 #ifdef LIBA52_DJBFFT 2360 #ifdef LIBA52_DJBFFT
2368 if (mm_accel & MM_ACCEL_DJBFFT) { 2361 if (mm_accel & MM_ACCEL_DJBFFT) {
2369 @@ -426,7 +1307,5 @@ 2362 @@ -426,7 +1300,5 @@
2370 #endif 2363 #endif
2371 { 2364 {
2372 fprintf (stderr, "No accelerated IMDCT transform found\n"); 2365 fprintf (stderr, "No accelerated IMDCT transform found\n");
2373 - ifft128 = ifft128_c; 2366 - ifft128 = ifft128_c;
2374 - ifft64 = ifft64_c; 2367 - ifft64 = ifft64_c;