Mercurial > mplayer.hg
comparison liba52/imdct.c @ 9001:01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
It's nearly bit-perfect, I have a couple of lsb
changed in a 128 frames sample. I can't hear the
differences :-)
patch by Romain Dolbeau <dolbeau@irisa.fr>
author | arpi |
---|---|
date | Sat, 18 Jan 2003 19:28:29 +0000 |
parents | fb88ccbc5ccc |
children | 5ba896a38d75 |
comparison
equal
deleted
inserted
replaced
9000:cc4c2ff1588e | 9001:01a9cf43074c |
---|---|
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 * | 22 * |
23 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | 23 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) |
24 * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru> | 24 * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru> |
25 * michael did port them from libac3 (untested, perhaps totally broken) | 25 * michael did port them from libac3 (untested, perhaps totally broken) |
26 * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org) | |
26 */ | 27 */ |
27 | 28 |
28 #include "config.h" | 29 #include "config.h" |
29 | 30 |
30 #include <math.h> | 31 #include <math.h> |
112 static float __attribute__((aligned(16))) sseW6[256]; | 113 static float __attribute__((aligned(16))) sseW6[256]; |
113 static float __attribute__((aligned(16))) *sseW[7]= | 114 static float __attribute__((aligned(16))) *sseW[7]= |
114 {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; | 115 {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; |
115 static float __attribute__((aligned(16))) sseWindow[512]; | 116 static float __attribute__((aligned(16))) sseWindow[512]; |
116 #else | 117 #else |
117 static complex_t buf[128]; | 118 static complex_t __attribute__((aligned(16))) buf[128]; |
118 #endif | 119 #endif |
119 | 120 |
120 /* Twiddle factor LUT */ | 121 /* Twiddle factor LUT */ |
121 static complex_t w_1[1]; | 122 static complex_t __attribute__((aligned(16))) w_1[1]; |
122 static complex_t w_2[2]; | 123 static complex_t __attribute__((aligned(16))) w_2[2]; |
123 static complex_t w_4[4]; | 124 static complex_t __attribute__((aligned(16))) w_4[4]; |
124 static complex_t w_8[8]; | 125 static complex_t __attribute__((aligned(16))) w_8[8]; |
125 static complex_t w_16[16]; | 126 static complex_t __attribute__((aligned(16))) w_16[16]; |
126 static complex_t w_32[32]; | 127 static complex_t __attribute__((aligned(16))) w_32[32]; |
127 static complex_t w_64[64]; | 128 static complex_t __attribute__((aligned(16))) w_64[64]; |
128 static complex_t * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64}; | 129 static complex_t __attribute__((aligned(16))) * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64}; |
129 | 130 |
130 /* Twiddle factors for IMDCT */ | 131 /* Twiddle factors for IMDCT */ |
131 static sample_t xcos1[128]; | 132 static sample_t __attribute__((aligned(16))) xcos1[128]; |
132 static sample_t xsin1[128]; | 133 static sample_t __attribute__((aligned(16))) xsin1[128]; |
133 static sample_t xcos2[64]; | 134 static sample_t __attribute__((aligned(16))) xcos2[64]; |
134 static sample_t xsin2[64]; | 135 static sample_t __attribute__((aligned(16))) xsin2[64]; |
135 | 136 |
136 /* Windowing function for Modified DCT - Thank you acroread */ | 137 /* Windowing function for Modified DCT - Thank you acroread */ |
137 sample_t imdct_window[] = { | 138 sample_t imdct_window[] = { |
138 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, | 139 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, |
139 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, | 140 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, |
381 for(i=0; i<64; i++) { | 382 for(i=0; i<64; i++) { |
382 *delay_ptr++ = buf[i].imag * *--window_ptr; | 383 *delay_ptr++ = buf[i].imag * *--window_ptr; |
383 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; | 384 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; |
384 } | 385 } |
385 } | 386 } |
387 | |
388 #ifdef HAVE_ALTIVEC | |
389 | |
390 // used to build registers permutation vectors (vcprm) | |
391 // the 's' are for words in the _s_econd vector | |
392 #define WORD_0 0x00,0x01,0x02,0x03 | |
393 #define WORD_1 0x04,0x05,0x06,0x07 | |
394 #define WORD_2 0x08,0x09,0x0a,0x0b | |
395 #define WORD_3 0x0c,0x0d,0x0e,0x0f | |
396 #define WORD_s0 0x10,0x11,0x12,0x13 | |
397 #define WORD_s1 0x14,0x15,0x16,0x17 | |
398 #define WORD_s2 0x18,0x19,0x1a,0x1b | |
399 #define WORD_s3 0x1c,0x1d,0x1e,0x1f | |
400 | |
401 #define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) | |
402 | |
403 // vcprmle is used to keep the same index as in the SSE version. | |
404 // it's the same as vcprm, with the index inversed | |
405 // ('le' is Little Endian) | |
406 #define vcprmle(a,b,c,d) vcprm(d,c,b,a) | |
407 | |
408 // used to build inverse/identity vectors (vcii) | |
409 // n is _n_egative, p is _p_ositive | |
410 #define FLOAT_n -1. | |
411 #define FLOAT_p 1. | |
412 | |
413 #define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) | |
414 | |
415 void | |
416 imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) | |
417 { | |
418 int i; | |
419 int k; | |
420 int p,q; | |
421 int m; | |
422 int two_m; | |
423 int two_m_plus_one; | |
424 | |
425 sample_t tmp_b_i; | |
426 sample_t tmp_b_r; | |
427 sample_t tmp_a_i; | |
428 sample_t tmp_a_r; | |
429 | |
430 sample_t *data_ptr; | |
431 sample_t *delay_ptr; | |
432 sample_t *window_ptr; | |
433 | |
434 /* 512 IMDCT with source and dest data in 'data' */ | |
435 | |
436 /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ | |
437 for( i=0; i < 128; i++) { | |
438 /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ | |
439 int j= bit_reverse_512[i]; | |
440 buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); | |
441 buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); | |
442 } | |
443 | |
444 /* 1. iteration */ | |
445 for(i = 0; i < 128; i += 2) { | |
446 #if 0 | |
447 tmp_a_r = buf[i].real; | |
448 tmp_a_i = buf[i].imag; | |
449 tmp_b_r = buf[i+1].real; | |
450 tmp_b_i = buf[i+1].imag; | |
451 buf[i].real = tmp_a_r + tmp_b_r; | |
452 buf[i].imag = tmp_a_i + tmp_b_i; | |
453 buf[i+1].real = tmp_a_r - tmp_b_r; | |
454 buf[i+1].imag = tmp_a_i - tmp_b_i; | |
455 #else | |
456 vector float temp, bufv; | |
457 | |
458 bufv = vec_ld(i << 3, (float*)buf); | |
459 temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); | |
460 bufv = vec_madd(bufv, vcii(p,p,n,n), temp); | |
461 vec_st(bufv, i << 3, (float*)buf); | |
462 #endif | |
463 } | |
464 | |
465 /* 2. iteration */ | |
466 // Note w[1]={{1,0}, {0,-1}} | |
467 for(i = 0; i < 128; i += 4) { | |
468 #if 0 | |
469 tmp_a_r = buf[i].real; | |
470 tmp_a_i = buf[i].imag; | |
471 tmp_b_r = buf[i+2].real; | |
472 tmp_b_i = buf[i+2].imag; | |
473 buf[i].real = tmp_a_r + tmp_b_r; | |
474 buf[i].imag = tmp_a_i + tmp_b_i; | |
475 buf[i+2].real = tmp_a_r - tmp_b_r; | |
476 buf[i+2].imag = tmp_a_i - tmp_b_i; | |
477 tmp_a_r = buf[i+1].real; | |
478 tmp_a_i = buf[i+1].imag; | |
479 /* WARNING: im <-> re here ! */ | |
480 tmp_b_r = buf[i+3].imag; | |
481 tmp_b_i = buf[i+3].real; | |
482 buf[i+1].real = tmp_a_r + tmp_b_r; | |
483 buf[i+1].imag = tmp_a_i - tmp_b_i; | |
484 buf[i+3].real = tmp_a_r - tmp_b_r; | |
485 buf[i+3].imag = tmp_a_i + tmp_b_i; | |
486 #else | |
487 vector float buf01, buf23, temp1, temp2; | |
488 | |
489 buf01 = vec_ld((i + 0) << 3, (float*)buf); | |
490 buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
491 buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); | |
492 | |
493 temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01); | |
494 temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01); | |
495 | |
496 vec_st(temp1, (i + 0) << 3, (float*)buf); | |
497 vec_st(temp2, (i + 2) << 3, (float*)buf); | |
498 #endif | |
499 } | |
500 | |
501 /* 3. iteration */ | |
502 for(i = 0; i < 128; i += 8) { | |
503 #if 0 | |
504 tmp_a_r = buf[i].real; | |
505 tmp_a_i = buf[i].imag; | |
506 tmp_b_r = buf[i+4].real; | |
507 tmp_b_i = buf[i+4].imag; | |
508 buf[i].real = tmp_a_r + tmp_b_r; | |
509 buf[i].imag = tmp_a_i + tmp_b_i; | |
510 buf[i+4].real = tmp_a_r - tmp_b_r; | |
511 buf[i+4].imag = tmp_a_i - tmp_b_i; | |
512 tmp_a_r = buf[1+i].real; | |
513 tmp_a_i = buf[1+i].imag; | |
514 tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; | |
515 tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; | |
516 buf[1+i].real = tmp_a_r + tmp_b_r; | |
517 buf[1+i].imag = tmp_a_i + tmp_b_i; | |
518 buf[i+5].real = tmp_a_r - tmp_b_r; | |
519 buf[i+5].imag = tmp_a_i - tmp_b_i; | |
520 tmp_a_r = buf[i+2].real; | |
521 tmp_a_i = buf[i+2].imag; | |
522 /* WARNING re <-> im & sign */ | |
523 tmp_b_r = buf[i+6].imag; | |
524 tmp_b_i = - buf[i+6].real; | |
525 buf[i+2].real = tmp_a_r + tmp_b_r; | |
526 buf[i+2].imag = tmp_a_i + tmp_b_i; | |
527 buf[i+6].real = tmp_a_r - tmp_b_r; | |
528 buf[i+6].imag = tmp_a_i - tmp_b_i; | |
529 tmp_a_r = buf[i+3].real; | |
530 tmp_a_i = buf[i+3].imag; | |
531 tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; | |
532 tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; | |
533 buf[i+3].real = tmp_a_r + tmp_b_r; | |
534 buf[i+3].imag = tmp_a_i + tmp_b_i; | |
535 buf[i+7].real = tmp_a_r - tmp_b_r; | |
536 buf[i+7].imag = tmp_a_i - tmp_b_i; | |
537 #else | |
538 vector float buf01, buf23, buf45, buf67; | |
539 | |
540 buf01 = vec_ld((i + 0) << 3, (float*)buf); | |
541 buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
542 | |
543 tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; | |
544 tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; | |
545 buf[i+5].real = tmp_b_r; | |
546 buf[i+5].imag = tmp_b_i; | |
547 tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; | |
548 tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; | |
549 buf[i+7].real = tmp_b_r; | |
550 buf[i+7].imag = tmp_b_i; | |
551 | |
552 buf23 = vec_ld((i + 2) << 3, (float*)buf); | |
553 buf45 = vec_ld((i + 4) << 3, (float*)buf); | |
554 buf67 = vec_ld((i + 6) << 3, (float*)buf); | |
555 buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); | |
556 | |
557 vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); | |
558 vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); | |
559 vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); | |
560 vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); | |
561 #endif | |
562 } | |
563 | |
564 /* 4-7. iterations */ | |
565 for (m=3; m < 7; m++) { | |
566 two_m = (1 << m); | |
567 | |
568 two_m_plus_one = two_m<<1; | |
569 | |
570 for(i = 0; i < 128; i += two_m_plus_one) { | |
571 for(k = 0; k < two_m; k+=2) { | |
572 #if 0 | |
573 int p = k + i; | |
574 int q = p + two_m; | |
575 tmp_a_r = buf[p].real; | |
576 tmp_a_i = buf[p].imag; | |
577 tmp_b_r = | |
578 buf[q].real * w[m][k].real - | |
579 buf[q].imag * w[m][k].imag; | |
580 tmp_b_i = | |
581 buf[q].imag * w[m][k].real + | |
582 buf[q].real * w[m][k].imag; | |
583 buf[p].real = tmp_a_r + tmp_b_r; | |
584 buf[p].imag = tmp_a_i + tmp_b_i; | |
585 buf[q].real = tmp_a_r - tmp_b_r; | |
586 buf[q].imag = tmp_a_i - tmp_b_i; | |
587 | |
588 tmp_a_r = buf[(p + 1)].real; | |
589 tmp_a_i = buf[(p + 1)].imag; | |
590 tmp_b_r = | |
591 buf[(q + 1)].real * w[m][(k + 1)].real - | |
592 buf[(q + 1)].imag * w[m][(k + 1)].imag; | |
593 tmp_b_i = | |
594 buf[(q + 1)].imag * w[m][(k + 1)].real + | |
595 buf[(q + 1)].real * w[m][(k + 1)].imag; | |
596 buf[(p + 1)].real = tmp_a_r + tmp_b_r; | |
597 buf[(p + 1)].imag = tmp_a_i + tmp_b_i; | |
598 buf[(q + 1)].real = tmp_a_r - tmp_b_r; | |
599 buf[(q + 1)].imag = tmp_a_i - tmp_b_i; | |
600 #else | |
601 int p = k + i; | |
602 int q = p + two_m; | |
603 vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4; | |
604 const vector float vczero = (const vector float)(0); | |
605 // first compute buf[q] and buf[q+1] | |
606 vecq = vec_ld(q << 3, (float*)buf); | |
607 vecw = vec_ld(0, (float*)&(w[m][k])); | |
608 temp1 = vec_madd(vecq, vecw, vczero); | |
609 temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2)); | |
610 temp2 = vec_madd(temp2, vecw, vczero); | |
611 temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2)); | |
612 temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3)); | |
613 vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); | |
614 // then butterfly with buf[p] and buf[p+1] | |
615 vecp = vec_ld(p << 3, (float*)buf); | |
616 | |
617 temp1 = vec_add(vecp, vecq); | |
618 temp2 = vec_sub(vecp, vecq); | |
619 | |
620 vec_st(temp1, p << 3, (float*)buf); | |
621 vec_st(temp2, q << 3, (float*)buf); | |
622 #endif | |
623 } | |
624 } | |
625 } | |
626 | |
627 /* Post IFFT complex multiply plus IFFT complex conjugate*/ | |
628 for( i=0; i < 128; i+=4) { | |
629 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ | |
630 #if 0 | |
631 tmp_a_r = buf[(i + 0)].real; | |
632 tmp_a_i = -1.0 * buf[(i + 0)].imag; | |
633 buf[(i + 0)].real = | |
634 (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]); | |
635 buf[(i + 0)].imag = | |
636 (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]); | |
637 | |
638 tmp_a_r = buf[(i + 1)].real; | |
639 tmp_a_i = -1.0 * buf[(i + 1)].imag; | |
640 buf[(i + 1)].real = | |
641 (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]); | |
642 buf[(i + 1)].imag = | |
643 (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]); | |
644 | |
645 tmp_a_r = buf[(i + 2)].real; | |
646 tmp_a_i = -1.0 * buf[(i + 2)].imag; | |
647 buf[(i + 2)].real = | |
648 (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]); | |
649 buf[(i + 2)].imag = | |
650 (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]); | |
651 | |
652 tmp_a_r = buf[(i + 3)].real; | |
653 tmp_a_i = -1.0 * buf[(i + 3)].imag; | |
654 buf[(i + 3)].real = | |
655 (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]); | |
656 buf[(i + 3)].imag = | |
657 (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]); | |
658 #else | |
659 vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2; | |
660 vector float temp0022, temp1133, tempCS01; | |
661 const vector float vczero = (const vector float)(0); | |
662 | |
663 bufv_0 = vec_ld((i + 0) << 3, (float*)buf); | |
664 bufv_2 = vec_ld((i + 2) << 3, (float*)buf); | |
665 | |
666 cosv = vec_ld(i << 2, xcos1); | |
667 sinv = vec_ld(i << 2, xsin1); | |
668 | |
669 temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2)); | |
670 temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3)); | |
671 tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1)); | |
672 temp1 = vec_madd(temp0022, tempCS01, vczero); | |
673 tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); | |
674 temp2 = vec_madd(temp1133, tempCS01, vczero); | |
675 bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); | |
676 | |
677 vec_st(bufv_0, (i + 0) << 3, (float*)buf); | |
678 | |
679 /* idem with bufv_2 and high-order cosv/sinv */ | |
680 | |
681 temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2)); | |
682 temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3)); | |
683 tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3)); | |
684 temp1 = vec_madd(temp0022, tempCS01, vczero); | |
685 tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3)); | |
686 temp2 = vec_madd(temp1133, tempCS01, vczero); | |
687 bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); | |
688 | |
689 vec_st(bufv_2, (i + 2) << 3, (float*)buf); | |
690 | |
691 #endif | |
692 } | |
693 | |
694 data_ptr = data; | |
695 delay_ptr = delay; | |
696 window_ptr = imdct_window; | |
697 | |
698 /* Window and convert to real valued signal */ | |
699 for(i=0; i< 64; i++) { | |
700 *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; | |
701 *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; | |
702 } | |
703 | |
704 for(i=0; i< 64; i++) { | |
705 *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; | |
706 *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; | |
707 } | |
708 | |
709 /* The trailing edge of the window goes into the delay line */ | |
710 delay_ptr = delay; | |
711 | |
712 for(i=0; i< 64; i++) { | |
713 *delay_ptr++ = -buf[64+i].real * *--window_ptr; | |
714 *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; | |
715 } | |
716 | |
717 for(i=0; i<64; i++) { | |
718 *delay_ptr++ = buf[i].imag * *--window_ptr; | |
719 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; | |
720 } | |
721 } | |
722 #endif | |
723 | |
386 | 724 |
387 // Stuff below this line is borrowed from libac3 | 725 // Stuff below this line is borrowed from libac3 |
388 #include "srfftp.h" | 726 #include "srfftp.h" |
389 #ifdef ARCH_X86 | 727 #ifdef ARCH_X86 |
390 #ifndef HAVE_3DNOW | 728 #ifndef HAVE_3DNOW |
963 fprintf (stderr, "Using 3DNow optimized IMDCT transform\n"); | 1301 fprintf (stderr, "Using 3DNow optimized IMDCT transform\n"); |
964 imdct_512 = imdct_do_512_3dnow; | 1302 imdct_512 = imdct_do_512_3dnow; |
965 } | 1303 } |
966 else | 1304 else |
967 #endif // arch_x86 | 1305 #endif // arch_x86 |
1306 #ifdef HAVE_ALTIVEC | |
1307 if (mm_accel & MM_ACCEL_PPC_ALTIVEC) | |
1308 { | |
1309 fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); | |
1310 imdct_512 = imdct_do_512_altivec; | |
1311 } | |
1312 else | |
1313 #endif | |
968 fprintf (stderr, "No accelerated IMDCT transform found\n"); | 1314 fprintf (stderr, "No accelerated IMDCT transform found\n"); |
969 imdct_256 = imdct_do_256; | 1315 imdct_256 = imdct_do_256; |
970 } | 1316 } |
971 } | 1317 } |
972 | 1318 |