Mercurial > libavcodec.hg
comparison mpegaudiodec.c @ 3600:949219039897 libavcodec
replace MULL by MULH + shift in dct32() (code is very slightly faster, and the compiler should be able to optimize the shift away on 64bit archs)
author | michael |
---|---|
date | Tue, 22 Aug 2006 06:24:59 +0000 |
parents | 9a069376b7e3 |
children | 99a352dc1601 |
comparison
equal
deleted
inserted
replaced
3599:9a069376b7e3 | 3600:949219039897 |
---|---|
511 | 511 |
512 /* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */ | 512 /* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */ |
513 | 513 |
514 /* cos(i*pi/64) */ | 514 /* cos(i*pi/64) */ |
515 | 515 |
516 #define COS0_0 FIXR(0.50060299823519630134) | 516 #define COS0_0 FIXHR(0.50060299823519630134/2) |
517 #define COS0_1 FIXR(0.50547095989754365998) | 517 #define COS0_1 FIXHR(0.50547095989754365998/2) |
518 #define COS0_2 FIXR(0.51544730992262454697) | 518 #define COS0_2 FIXHR(0.51544730992262454697/2) |
519 #define COS0_3 FIXR(0.53104259108978417447) | 519 #define COS0_3 FIXHR(0.53104259108978417447/2) |
520 #define COS0_4 FIXR(0.55310389603444452782) | 520 #define COS0_4 FIXHR(0.55310389603444452782/2) |
521 #define COS0_5 FIXR(0.58293496820613387367) | 521 #define COS0_5 FIXHR(0.58293496820613387367/2) |
522 #define COS0_6 FIXR(0.62250412303566481615) | 522 #define COS0_6 FIXHR(0.62250412303566481615/2) |
523 #define COS0_7 FIXR(0.67480834145500574602) | 523 #define COS0_7 FIXHR(0.67480834145500574602/2) |
524 #define COS0_8 FIXR(0.74453627100229844977) | 524 #define COS0_8 FIXHR(0.74453627100229844977/2) |
525 #define COS0_9 FIXR(0.83934964541552703873) | 525 #define COS0_9 FIXHR(0.83934964541552703873/2) |
526 #define COS0_10 FIXR(0.97256823786196069369) | 526 #define COS0_10 FIXHR(0.97256823786196069369/2) |
527 #define COS0_11 FIXR(1.16943993343288495515) | 527 #define COS0_11 FIXHR(1.16943993343288495515/4) |
528 #define COS0_12 FIXR(1.48416461631416627724) | 528 #define COS0_12 FIXHR(1.48416461631416627724/4) |
529 #define COS0_13 FIXR(2.05778100995341155085) | 529 #define COS0_13 FIXHR(2.05778100995341155085/8) |
530 #define COS0_14 FIXR(3.40760841846871878570) | 530 #define COS0_14 FIXHR(3.40760841846871878570/8) |
531 #define COS0_15 FIXR(10.19000812354805681150) | 531 #define COS0_15 FIXHR(10.19000812354805681150/32) |
532 | 532 |
533 #define COS1_0 FIXR(0.50241928618815570551) | 533 #define COS1_0 FIXHR(0.50241928618815570551/2) |
534 #define COS1_1 FIXR(0.52249861493968888062) | 534 #define COS1_1 FIXHR(0.52249861493968888062/2) |
535 #define COS1_2 FIXR(0.56694403481635770368) | 535 #define COS1_2 FIXHR(0.56694403481635770368/2) |
536 #define COS1_3 FIXR(0.64682178335999012954) | 536 #define COS1_3 FIXHR(0.64682178335999012954/2) |
537 #define COS1_4 FIXR(0.78815462345125022473) | 537 #define COS1_4 FIXHR(0.78815462345125022473/2) |
538 #define COS1_5 FIXR(1.06067768599034747134) | 538 #define COS1_5 FIXHR(1.06067768599034747134/4) |
539 #define COS1_6 FIXR(1.72244709823833392782) | 539 #define COS1_6 FIXHR(1.72244709823833392782/4) |
540 #define COS1_7 FIXR(5.10114861868916385802) | 540 #define COS1_7 FIXHR(5.10114861868916385802/16) |
541 | 541 |
542 #define COS2_0 FIXR(0.50979557910415916894) | 542 #define COS2_0 FIXHR(0.50979557910415916894/2) |
543 #define COS2_1 FIXR(0.60134488693504528054) | 543 #define COS2_1 FIXHR(0.60134488693504528054/2) |
544 #define COS2_2 FIXR(0.89997622313641570463) | 544 #define COS2_2 FIXHR(0.89997622313641570463/2) |
545 #define COS2_3 FIXR(2.56291544774150617881) | 545 #define COS2_3 FIXHR(2.56291544774150617881/8) |
546 | 546 |
547 #define COS3_0 FIXR(0.54119610014619698439) | 547 #define COS3_0 FIXHR(0.54119610014619698439/2) |
548 #define COS3_1 FIXR(1.30656296487637652785) | 548 #define COS3_1 FIXHR(1.30656296487637652785/4) |
549 | 549 |
550 #define COS4_0 FIXR(0.70710678118654752439) | 550 #define COS4_0 FIXHR(0.70710678118654752439/2) |
551 | 551 |
552 /* butterfly operator */ | 552 /* butterfly operator */ |
553 #define BF(a, b, c)\ | 553 #define BF(a, b, c, s)\ |
554 {\ | 554 {\ |
555 tmp0 = tab[a] + tab[b];\ | 555 tmp0 = tab[a] + tab[b];\ |
556 tmp1 = tab[a] - tab[b];\ | 556 tmp1 = tab[a] - tab[b];\ |
557 tab[a] = tmp0;\ | 557 tab[a] = tmp0;\ |
558 tab[b] = MULL(tmp1, c);\ | 558 tab[b] = MULH(tmp1<<(s), c);\ |
559 } | 559 } |
560 | 560 |
561 #define BF1(a, b, c, d)\ | 561 #define BF1(a, b, c, d)\ |
562 {\ | 562 {\ |
563 BF(a, b, COS4_0);\ | 563 BF(a, b, COS4_0, 1);\ |
564 BF(c, d, -COS4_0);\ | 564 BF(c, d,-COS4_0, 1);\ |
565 tab[c] += tab[d];\ | 565 tab[c] += tab[d];\ |
566 } | 566 } |
567 | 567 |
568 #define BF2(a, b, c, d)\ | 568 #define BF2(a, b, c, d)\ |
569 {\ | 569 {\ |
570 BF(a, b, COS4_0);\ | 570 BF(a, b, COS4_0, 1);\ |
571 BF(c, d, -COS4_0);\ | 571 BF(c, d,-COS4_0, 1);\ |
572 tab[c] += tab[d];\ | 572 tab[c] += tab[d];\ |
573 tab[a] += tab[c];\ | 573 tab[a] += tab[c];\ |
574 tab[c] += tab[b];\ | 574 tab[c] += tab[b];\ |
575 tab[b] += tab[d];\ | 575 tab[b] += tab[d];\ |
576 } | 576 } |
580 /* DCT32 without 1/sqrt(2) coef zero scaling. */ | 580 /* DCT32 without 1/sqrt(2) coef zero scaling. */ |
581 static void dct32(int32_t *out, int32_t *tab) | 581 static void dct32(int32_t *out, int32_t *tab) |
582 { | 582 { |
583 int tmp0, tmp1; | 583 int tmp0, tmp1; |
584 | 584 |
585 | |
586 /* pass 1 */ | 585 /* pass 1 */ |
587 BF(0, 31, COS0_0); | 586 BF( 0, 31, COS0_0 , 1); |
588 BF(15, 16, COS0_15); | 587 BF(15, 16, COS0_15, 5); |
589 /* pass 2 */ | 588 /* pass 2 */ |
590 BF(0, 15, COS1_0); | 589 BF( 0, 15, COS1_0 , 1); |
591 BF(16, 31, -COS1_0); | 590 BF(16, 31,-COS1_0 , 1); |
592 /* pass 1 */ | 591 /* pass 1 */ |
593 BF(7, 24, COS0_7); | 592 BF( 7, 24, COS0_7 , 1); |
594 BF(8, 23, COS0_8); | 593 BF( 8, 23, COS0_8 , 1); |
595 /* pass 2 */ | 594 /* pass 2 */ |
596 BF(7, 8, COS1_7); | 595 BF( 7, 8, COS1_7 , 4); |
597 BF(23, 24, -COS1_7); | 596 BF(23, 24,-COS1_7 , 4); |
598 /* pass 3 */ | 597 /* pass 3 */ |
599 BF(0, 7, COS2_0); | 598 BF( 0, 7, COS2_0 , 1); |
600 BF(8, 15, -COS2_0); | 599 BF( 8, 15,-COS2_0 , 1); |
601 BF(16, 23, COS2_0); | 600 BF(16, 23, COS2_0 , 1); |
602 BF(24, 31, -COS2_0); | 601 BF(24, 31,-COS2_0 , 1); |
603 /* pass 1 */ | 602 /* pass 1 */ |
604 BF(3, 28, COS0_3); | 603 BF( 3, 28, COS0_3 , 1); |
605 BF(12, 19, COS0_12); | 604 BF(12, 19, COS0_12, 2); |
606 /* pass 2 */ | 605 /* pass 2 */ |
607 BF(3, 12, COS1_3); | 606 BF( 3, 12, COS1_3 , 1); |
608 BF(19, 28, -COS1_3); | 607 BF(19, 28,-COS1_3 , 1); |
609 /* pass 1 */ | 608 /* pass 1 */ |
610 BF(4, 27, COS0_4); | 609 BF( 4, 27, COS0_4 , 1); |
611 BF(11, 20, COS0_11); | 610 BF(11, 20, COS0_11, 2); |
612 /* pass 2 */ | 611 /* pass 2 */ |
613 BF(4, 11, COS1_4); | 612 BF( 4, 11, COS1_4 , 1); |
614 BF(20, 27, -COS1_4); | 613 BF(20, 27,-COS1_4 , 1); |
615 /* pass 3 */ | 614 /* pass 3 */ |
616 BF(3, 4, COS2_3); | 615 BF( 3, 4, COS2_3 , 3); |
617 BF(11, 12, -COS2_3); | 616 BF(11, 12,-COS2_3 , 3); |
618 BF(19, 20, COS2_3); | 617 BF(19, 20, COS2_3 , 3); |
619 BF(27, 28, -COS2_3); | 618 BF(27, 28,-COS2_3 , 3); |
620 /* pass 4 */ | 619 /* pass 4 */ |
621 BF(0, 3, COS3_0); | 620 BF( 0, 3, COS3_0 , 1); |
622 BF(4, 7, -COS3_0); | 621 BF( 4, 7,-COS3_0 , 1); |
623 BF(8, 11, COS3_0); | 622 BF( 8, 11, COS3_0 , 1); |
624 BF(12, 15, -COS3_0); | 623 BF(12, 15,-COS3_0 , 1); |
625 BF(16, 19, COS3_0); | 624 BF(16, 19, COS3_0 , 1); |
626 BF(20, 23, -COS3_0); | 625 BF(20, 23,-COS3_0 , 1); |
627 BF(24, 27, COS3_0); | 626 BF(24, 27, COS3_0 , 1); |
628 BF(28, 31, -COS3_0); | 627 BF(28, 31,-COS3_0 , 1); |
629 | 628 |
630 | 629 |
631 | 630 |
632 /* pass 1 */ | 631 /* pass 1 */ |
633 BF(1, 30, COS0_1); | 632 BF( 1, 30, COS0_1 , 1); |
634 BF(14, 17, COS0_14); | 633 BF(14, 17, COS0_14, 3); |
635 /* pass 2 */ | 634 /* pass 2 */ |
636 BF(1, 14, COS1_1); | 635 BF( 1, 14, COS1_1 , 1); |
637 BF(17, 30, -COS1_1); | 636 BF(17, 30,-COS1_1 , 1); |
638 /* pass 1 */ | 637 /* pass 1 */ |
639 BF(6, 25, COS0_6); | 638 BF( 6, 25, COS0_6 , 1); |
640 BF(9, 22, COS0_9); | 639 BF( 9, 22, COS0_9 , 1); |
641 /* pass 2 */ | 640 /* pass 2 */ |
642 BF(6, 9, COS1_6); | 641 BF( 6, 9, COS1_6 , 2); |
643 BF(22, 25, -COS1_6); | 642 BF(22, 25,-COS1_6 , 2); |
644 /* pass 3 */ | 643 /* pass 3 */ |
645 BF(1, 6, COS2_1); | 644 BF( 1, 6, COS2_1 , 1); |
646 BF(9, 14, -COS2_1); | 645 BF( 9, 14,-COS2_1 , 1); |
647 BF(17, 22, COS2_1); | 646 BF(17, 22, COS2_1 , 1); |
648 BF(25, 30, -COS2_1); | 647 BF(25, 30,-COS2_1 , 1); |
649 | 648 |
650 /* pass 1 */ | 649 /* pass 1 */ |
651 BF(2, 29, COS0_2); | 650 BF( 2, 29, COS0_2 , 1); |
652 BF(13, 18, COS0_13); | 651 BF(13, 18, COS0_13, 3); |
653 /* pass 2 */ | 652 /* pass 2 */ |
654 BF(2, 13, COS1_2); | 653 BF( 2, 13, COS1_2 , 1); |
655 BF(18, 29, -COS1_2); | 654 BF(18, 29,-COS1_2 , 1); |
656 /* pass 1 */ | 655 /* pass 1 */ |
657 BF(5, 26, COS0_5); | 656 BF( 5, 26, COS0_5 , 1); |
658 BF(10, 21, COS0_10); | 657 BF(10, 21, COS0_10, 1); |
659 /* pass 2 */ | 658 /* pass 2 */ |
660 BF(5, 10, COS1_5); | 659 BF( 5, 10, COS1_5 , 2); |
661 BF(21, 26, -COS1_5); | 660 BF(21, 26,-COS1_5 , 2); |
662 /* pass 3 */ | 661 /* pass 3 */ |
663 BF(2, 5, COS2_2); | 662 BF( 2, 5, COS2_2 , 1); |
664 BF(10, 13, -COS2_2); | 663 BF(10, 13,-COS2_2 , 1); |
665 BF(18, 21, COS2_2); | 664 BF(18, 21, COS2_2 , 1); |
666 BF(26, 29, -COS2_2); | 665 BF(26, 29,-COS2_2 , 1); |
667 /* pass 4 */ | 666 /* pass 4 */ |
668 BF(1, 2, COS3_1); | 667 BF( 1, 2, COS3_1 , 2); |
669 BF(5, 6, -COS3_1); | 668 BF( 5, 6,-COS3_1 , 2); |
670 BF(9, 10, COS3_1); | 669 BF( 9, 10, COS3_1 , 2); |
671 BF(13, 14, -COS3_1); | 670 BF(13, 14,-COS3_1 , 2); |
672 BF(17, 18, COS3_1); | 671 BF(17, 18, COS3_1 , 2); |
673 BF(21, 22, -COS3_1); | 672 BF(21, 22,-COS3_1 , 2); |
674 BF(25, 26, COS3_1); | 673 BF(25, 26, COS3_1 , 2); |
675 BF(29, 30, -COS3_1); | 674 BF(29, 30,-COS3_1 , 2); |
676 | 675 |
677 /* pass 5 */ | 676 /* pass 5 */ |
678 BF1(0, 1, 2, 3); | 677 BF1( 0, 1, 2, 3); |
679 BF2(4, 5, 6, 7); | 678 BF2( 4, 5, 6, 7); |
680 BF1(8, 9, 10, 11); | 679 BF1( 8, 9, 10, 11); |
681 BF2(12, 13, 14, 15); | 680 BF2(12, 13, 14, 15); |
682 BF1(16, 17, 18, 19); | 681 BF1(16, 17, 18, 19); |
683 BF2(20, 21, 22, 23); | 682 BF2(20, 21, 22, 23); |
684 BF1(24, 25, 26, 27); | 683 BF1(24, 25, 26, 27); |
685 BF2(28, 29, 30, 31); | 684 BF2(28, 29, 30, 31); |