comparison mpegaudiodec.c @ 3600:949219039897 libavcodec

replace MULL by MULH + shift in dct32() (code is very slightly faster, and the compiler should be able to optimize the shift away on 64bit archs)
author michael
date Tue, 22 Aug 2006 06:24:59 +0000
parents 9a069376b7e3
children 99a352dc1601
comparison
equal deleted inserted replaced
3599:9a069376b7e3 3600:949219039897
511 511
512 /* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */ 512 /* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
513 513
514 /* cos(i*pi/64) */ 514 /* cos(i*pi/64) */
515 515
516 #define COS0_0 FIXR(0.50060299823519630134) 516 #define COS0_0 FIXHR(0.50060299823519630134/2)
517 #define COS0_1 FIXR(0.50547095989754365998) 517 #define COS0_1 FIXHR(0.50547095989754365998/2)
518 #define COS0_2 FIXR(0.51544730992262454697) 518 #define COS0_2 FIXHR(0.51544730992262454697/2)
519 #define COS0_3 FIXR(0.53104259108978417447) 519 #define COS0_3 FIXHR(0.53104259108978417447/2)
520 #define COS0_4 FIXR(0.55310389603444452782) 520 #define COS0_4 FIXHR(0.55310389603444452782/2)
521 #define COS0_5 FIXR(0.58293496820613387367) 521 #define COS0_5 FIXHR(0.58293496820613387367/2)
522 #define COS0_6 FIXR(0.62250412303566481615) 522 #define COS0_6 FIXHR(0.62250412303566481615/2)
523 #define COS0_7 FIXR(0.67480834145500574602) 523 #define COS0_7 FIXHR(0.67480834145500574602/2)
524 #define COS0_8 FIXR(0.74453627100229844977) 524 #define COS0_8 FIXHR(0.74453627100229844977/2)
525 #define COS0_9 FIXR(0.83934964541552703873) 525 #define COS0_9 FIXHR(0.83934964541552703873/2)
526 #define COS0_10 FIXR(0.97256823786196069369) 526 #define COS0_10 FIXHR(0.97256823786196069369/2)
527 #define COS0_11 FIXR(1.16943993343288495515) 527 #define COS0_11 FIXHR(1.16943993343288495515/4)
528 #define COS0_12 FIXR(1.48416461631416627724) 528 #define COS0_12 FIXHR(1.48416461631416627724/4)
529 #define COS0_13 FIXR(2.05778100995341155085) 529 #define COS0_13 FIXHR(2.05778100995341155085/8)
530 #define COS0_14 FIXR(3.40760841846871878570) 530 #define COS0_14 FIXHR(3.40760841846871878570/8)
531 #define COS0_15 FIXR(10.19000812354805681150) 531 #define COS0_15 FIXHR(10.19000812354805681150/32)
532 532
533 #define COS1_0 FIXR(0.50241928618815570551) 533 #define COS1_0 FIXHR(0.50241928618815570551/2)
534 #define COS1_1 FIXR(0.52249861493968888062) 534 #define COS1_1 FIXHR(0.52249861493968888062/2)
535 #define COS1_2 FIXR(0.56694403481635770368) 535 #define COS1_2 FIXHR(0.56694403481635770368/2)
536 #define COS1_3 FIXR(0.64682178335999012954) 536 #define COS1_3 FIXHR(0.64682178335999012954/2)
537 #define COS1_4 FIXR(0.78815462345125022473) 537 #define COS1_4 FIXHR(0.78815462345125022473/2)
538 #define COS1_5 FIXR(1.06067768599034747134) 538 #define COS1_5 FIXHR(1.06067768599034747134/4)
539 #define COS1_6 FIXR(1.72244709823833392782) 539 #define COS1_6 FIXHR(1.72244709823833392782/4)
540 #define COS1_7 FIXR(5.10114861868916385802) 540 #define COS1_7 FIXHR(5.10114861868916385802/16)
541 541
542 #define COS2_0 FIXR(0.50979557910415916894) 542 #define COS2_0 FIXHR(0.50979557910415916894/2)
543 #define COS2_1 FIXR(0.60134488693504528054) 543 #define COS2_1 FIXHR(0.60134488693504528054/2)
544 #define COS2_2 FIXR(0.89997622313641570463) 544 #define COS2_2 FIXHR(0.89997622313641570463/2)
545 #define COS2_3 FIXR(2.56291544774150617881) 545 #define COS2_3 FIXHR(2.56291544774150617881/8)
546 546
547 #define COS3_0 FIXR(0.54119610014619698439) 547 #define COS3_0 FIXHR(0.54119610014619698439/2)
548 #define COS3_1 FIXR(1.30656296487637652785) 548 #define COS3_1 FIXHR(1.30656296487637652785/4)
549 549
550 #define COS4_0 FIXR(0.70710678118654752439) 550 #define COS4_0 FIXHR(0.70710678118654752439/2)
551 551
552 /* butterfly operator */ 552 /* butterfly operator */
553 #define BF(a, b, c)\ 553 #define BF(a, b, c, s)\
554 {\ 554 {\
555 tmp0 = tab[a] + tab[b];\ 555 tmp0 = tab[a] + tab[b];\
556 tmp1 = tab[a] - tab[b];\ 556 tmp1 = tab[a] - tab[b];\
557 tab[a] = tmp0;\ 557 tab[a] = tmp0;\
558 tab[b] = MULL(tmp1, c);\ 558 tab[b] = MULH(tmp1<<(s), c);\
559 } 559 }
560 560
561 #define BF1(a, b, c, d)\ 561 #define BF1(a, b, c, d)\
562 {\ 562 {\
563 BF(a, b, COS4_0);\ 563 BF(a, b, COS4_0, 1);\
564 BF(c, d, -COS4_0);\ 564 BF(c, d,-COS4_0, 1);\
565 tab[c] += tab[d];\ 565 tab[c] += tab[d];\
566 } 566 }
567 567
568 #define BF2(a, b, c, d)\ 568 #define BF2(a, b, c, d)\
569 {\ 569 {\
570 BF(a, b, COS4_0);\ 570 BF(a, b, COS4_0, 1);\
571 BF(c, d, -COS4_0);\ 571 BF(c, d,-COS4_0, 1);\
572 tab[c] += tab[d];\ 572 tab[c] += tab[d];\
573 tab[a] += tab[c];\ 573 tab[a] += tab[c];\
574 tab[c] += tab[b];\ 574 tab[c] += tab[b];\
575 tab[b] += tab[d];\ 575 tab[b] += tab[d];\
576 } 576 }
580 /* DCT32 without 1/sqrt(2) coef zero scaling. */ 580 /* DCT32 without 1/sqrt(2) coef zero scaling. */
581 static void dct32(int32_t *out, int32_t *tab) 581 static void dct32(int32_t *out, int32_t *tab)
582 { 582 {
583 int tmp0, tmp1; 583 int tmp0, tmp1;
584 584
585
586 /* pass 1 */ 585 /* pass 1 */
587 BF(0, 31, COS0_0); 586 BF( 0, 31, COS0_0 , 1);
588 BF(15, 16, COS0_15); 587 BF(15, 16, COS0_15, 5);
589 /* pass 2 */ 588 /* pass 2 */
590 BF(0, 15, COS1_0); 589 BF( 0, 15, COS1_0 , 1);
591 BF(16, 31, -COS1_0); 590 BF(16, 31,-COS1_0 , 1);
592 /* pass 1 */ 591 /* pass 1 */
593 BF(7, 24, COS0_7); 592 BF( 7, 24, COS0_7 , 1);
594 BF(8, 23, COS0_8); 593 BF( 8, 23, COS0_8 , 1);
595 /* pass 2 */ 594 /* pass 2 */
596 BF(7, 8, COS1_7); 595 BF( 7, 8, COS1_7 , 4);
597 BF(23, 24, -COS1_7); 596 BF(23, 24,-COS1_7 , 4);
598 /* pass 3 */ 597 /* pass 3 */
599 BF(0, 7, COS2_0); 598 BF( 0, 7, COS2_0 , 1);
600 BF(8, 15, -COS2_0); 599 BF( 8, 15,-COS2_0 , 1);
601 BF(16, 23, COS2_0); 600 BF(16, 23, COS2_0 , 1);
602 BF(24, 31, -COS2_0); 601 BF(24, 31,-COS2_0 , 1);
603 /* pass 1 */ 602 /* pass 1 */
604 BF(3, 28, COS0_3); 603 BF( 3, 28, COS0_3 , 1);
605 BF(12, 19, COS0_12); 604 BF(12, 19, COS0_12, 2);
606 /* pass 2 */ 605 /* pass 2 */
607 BF(3, 12, COS1_3); 606 BF( 3, 12, COS1_3 , 1);
608 BF(19, 28, -COS1_3); 607 BF(19, 28,-COS1_3 , 1);
609 /* pass 1 */ 608 /* pass 1 */
610 BF(4, 27, COS0_4); 609 BF( 4, 27, COS0_4 , 1);
611 BF(11, 20, COS0_11); 610 BF(11, 20, COS0_11, 2);
612 /* pass 2 */ 611 /* pass 2 */
613 BF(4, 11, COS1_4); 612 BF( 4, 11, COS1_4 , 1);
614 BF(20, 27, -COS1_4); 613 BF(20, 27,-COS1_4 , 1);
615 /* pass 3 */ 614 /* pass 3 */
616 BF(3, 4, COS2_3); 615 BF( 3, 4, COS2_3 , 3);
617 BF(11, 12, -COS2_3); 616 BF(11, 12,-COS2_3 , 3);
618 BF(19, 20, COS2_3); 617 BF(19, 20, COS2_3 , 3);
619 BF(27, 28, -COS2_3); 618 BF(27, 28,-COS2_3 , 3);
620 /* pass 4 */ 619 /* pass 4 */
621 BF(0, 3, COS3_0); 620 BF( 0, 3, COS3_0 , 1);
622 BF(4, 7, -COS3_0); 621 BF( 4, 7,-COS3_0 , 1);
623 BF(8, 11, COS3_0); 622 BF( 8, 11, COS3_0 , 1);
624 BF(12, 15, -COS3_0); 623 BF(12, 15,-COS3_0 , 1);
625 BF(16, 19, COS3_0); 624 BF(16, 19, COS3_0 , 1);
626 BF(20, 23, -COS3_0); 625 BF(20, 23,-COS3_0 , 1);
627 BF(24, 27, COS3_0); 626 BF(24, 27, COS3_0 , 1);
628 BF(28, 31, -COS3_0); 627 BF(28, 31,-COS3_0 , 1);
629 628
630 629
631 630
632 /* pass 1 */ 631 /* pass 1 */
633 BF(1, 30, COS0_1); 632 BF( 1, 30, COS0_1 , 1);
634 BF(14, 17, COS0_14); 633 BF(14, 17, COS0_14, 3);
635 /* pass 2 */ 634 /* pass 2 */
636 BF(1, 14, COS1_1); 635 BF( 1, 14, COS1_1 , 1);
637 BF(17, 30, -COS1_1); 636 BF(17, 30,-COS1_1 , 1);
638 /* pass 1 */ 637 /* pass 1 */
639 BF(6, 25, COS0_6); 638 BF( 6, 25, COS0_6 , 1);
640 BF(9, 22, COS0_9); 639 BF( 9, 22, COS0_9 , 1);
641 /* pass 2 */ 640 /* pass 2 */
642 BF(6, 9, COS1_6); 641 BF( 6, 9, COS1_6 , 2);
643 BF(22, 25, -COS1_6); 642 BF(22, 25,-COS1_6 , 2);
644 /* pass 3 */ 643 /* pass 3 */
645 BF(1, 6, COS2_1); 644 BF( 1, 6, COS2_1 , 1);
646 BF(9, 14, -COS2_1); 645 BF( 9, 14,-COS2_1 , 1);
647 BF(17, 22, COS2_1); 646 BF(17, 22, COS2_1 , 1);
648 BF(25, 30, -COS2_1); 647 BF(25, 30,-COS2_1 , 1);
649 648
650 /* pass 1 */ 649 /* pass 1 */
651 BF(2, 29, COS0_2); 650 BF( 2, 29, COS0_2 , 1);
652 BF(13, 18, COS0_13); 651 BF(13, 18, COS0_13, 3);
653 /* pass 2 */ 652 /* pass 2 */
654 BF(2, 13, COS1_2); 653 BF( 2, 13, COS1_2 , 1);
655 BF(18, 29, -COS1_2); 654 BF(18, 29,-COS1_2 , 1);
656 /* pass 1 */ 655 /* pass 1 */
657 BF(5, 26, COS0_5); 656 BF( 5, 26, COS0_5 , 1);
658 BF(10, 21, COS0_10); 657 BF(10, 21, COS0_10, 1);
659 /* pass 2 */ 658 /* pass 2 */
660 BF(5, 10, COS1_5); 659 BF( 5, 10, COS1_5 , 2);
661 BF(21, 26, -COS1_5); 660 BF(21, 26,-COS1_5 , 2);
662 /* pass 3 */ 661 /* pass 3 */
663 BF(2, 5, COS2_2); 662 BF( 2, 5, COS2_2 , 1);
664 BF(10, 13, -COS2_2); 663 BF(10, 13,-COS2_2 , 1);
665 BF(18, 21, COS2_2); 664 BF(18, 21, COS2_2 , 1);
666 BF(26, 29, -COS2_2); 665 BF(26, 29,-COS2_2 , 1);
667 /* pass 4 */ 666 /* pass 4 */
668 BF(1, 2, COS3_1); 667 BF( 1, 2, COS3_1 , 2);
669 BF(5, 6, -COS3_1); 668 BF( 5, 6,-COS3_1 , 2);
670 BF(9, 10, COS3_1); 669 BF( 9, 10, COS3_1 , 2);
671 BF(13, 14, -COS3_1); 670 BF(13, 14,-COS3_1 , 2);
672 BF(17, 18, COS3_1); 671 BF(17, 18, COS3_1 , 2);
673 BF(21, 22, -COS3_1); 672 BF(21, 22,-COS3_1 , 2);
674 BF(25, 26, COS3_1); 673 BF(25, 26, COS3_1 , 2);
675 BF(29, 30, -COS3_1); 674 BF(29, 30,-COS3_1 , 2);
676 675
677 /* pass 5 */ 676 /* pass 5 */
678 BF1(0, 1, 2, 3); 677 BF1( 0, 1, 2, 3);
679 BF2(4, 5, 6, 7); 678 BF2( 4, 5, 6, 7);
680 BF1(8, 9, 10, 11); 679 BF1( 8, 9, 10, 11);
681 BF2(12, 13, 14, 15); 680 BF2(12, 13, 14, 15);
682 BF1(16, 17, 18, 19); 681 BF1(16, 17, 18, 19);
683 BF2(20, 21, 22, 23); 682 BF2(20, 21, 22, 23);
684 BF1(24, 25, 26, 27); 683 BF1(24, 25, 26, 27);
685 BF2(28, 29, 30, 31); 684 BF2(28, 29, 30, 31);