comparison src/aac/libfaad2/sbr_dct.c @ 3004:8867d3491f60

Sync with FAAD 2.7; will fix warnings in separate commits.
author Tony Vroon <chainsaw@gentoo.org>
date Wed, 08 Apr 2009 20:12:57 +0100
parents f1b6f1b2cdb3
children
comparison
equal deleted inserted replaced
3003:8b7a44631121 3004:8867d3491f60
1 /* 1 /*
2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding 2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com 3 ** Copyright (C) 2003-2005 M. Bakker, Nero AG, http://www.nero.com
4 ** 4 **
5 ** This program is free software; you can redistribute it and/or modify 5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by 6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or 7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version. 8 ** (at your option) any later version.
9 ** 9 **
10 ** This program is distributed in the hope that it will be useful, 10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details. 13 ** GNU General Public License for more details.
14 ** 14 **
15 ** You should have received a copy of the GNU General Public License 15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software 16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 ** 18 **
19 ** Any non-GPL usage of this software or parts of this software is strictly 19 ** Any non-GPL usage of this software or parts of this software is strictly
20 ** forbidden. 20 ** forbidden.
21 ** 21 **
22 ** The "appropriate copyright message" mentioned in section 2c of the GPLv2
23 ** must read: "Code from FAAD2 is copyright (c) Nero AG, www.nero.com"
24 **
22 ** Commercial non-GPL licensing of this software is possible. 25 ** Commercial non-GPL licensing of this software is possible.
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. 26 ** For more info contact Nero AG through Mpeg4AAClicense@nero.com.
24 ** 27 **
25 ** $Id: sbr_dct.c,v 1.15 2004/09/04 14:56:28 menno Exp $ 28 ** $Id: sbr_dct.c,v 1.20 2007/11/01 12:33:34 menno Exp $
26 **/ 29 **/
30
31
32 /* Most of the DCT/DST codes here are generated using Spiral which is GPL
33 * For more info see: http://www.spiral.net/
34 */
27 35
28 #include "common.h" 36 #include "common.h"
29 37
30 #ifdef SBR_DEC 38 #ifdef SBR_DEC
31 39
477 f397 = MUL_C(COEF_CONST(1.0708550202783576), f300); 485 f397 = MUL_C(COEF_CONST(1.0708550202783576), f300);
478 y[30] = f395 + f396; 486 y[30] = f395 + f396;
479 y[1] = f397 - f396; 487 y[1] = f397 - f396;
480 } 488 }
481 489
482 #ifdef SBR_LOW_POWER
483
484 void DCT2_16_unscaled(real_t *y, real_t *x)
485 {
486 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10;
487 real_t f11, f12, f13, f14, f15, f16, f17, f18, f19, f20;
488 real_t f21, f22, f23, f24, f25, f26, f27, f28, f31, f32;
489 real_t f33, f34, f37, f38, f39, f40, f41, f42, f43, f44;
490 real_t f45, f46, f47, f48, f49, f51, f53, f54, f57, f58;
491 real_t f59, f60, f61, f62, f63, f64, f65, f66, f67, f68;
492 real_t f69, f70, f71, f72, f73, f74, f75, f76, f77, f78;
493 real_t f79, f80, f81, f82, f83, f84, f85, f86, f87, f88;
494 real_t f89, f90, f91, f92, f95, f96, f97, f98, f101, f102;
495 real_t f103, f104, f107, f108, f109, f110;
496
497 f0 = x[0] - x[15];
498 f1 = x[0] + x[15];
499 f2 = x[1] - x[14];
500 f3 = x[1] + x[14];
501 f4 = x[2] - x[13];
502 f5 = x[2] + x[13];
503 f6 = x[3] - x[12];
504 f7 = x[3] + x[12];
505 f8 = x[4] - x[11];
506 f9 = x[4] + x[11];
507 f10 = x[5] - x[10];
508 f11 = x[5] + x[10];
509 f12 = x[6] - x[9];
510 f13 = x[6] + x[9];
511 f14 = x[7] - x[8];
512 f15 = x[7] + x[8];
513 f16 = f1 - f15;
514 f17 = f1 + f15;
515 f18 = f3 - f13;
516 f19 = f3 + f13;
517 f20 = f5 - f11;
518 f21 = f5 + f11;
519 f22 = f7 - f9;
520 f23 = f7 + f9;
521 f24 = f17 - f23;
522 f25 = f17 + f23;
523 f26 = f19 - f21;
524 f27 = f19 + f21;
525 f28 = f25 - f27;
526 y[0] = f25 + f27;
527 y[8] = MUL_F(f28, FRAC_CONST(0.7071067811865476));
528 f31 = f24 + f26;
529 f32 = MUL_C(f24, COEF_CONST(1.3065629648763766));
530 f33 = MUL_F(f31, FRAC_CONST(-0.9238795325112866));
531 f34 = MUL_F(f26, FRAC_CONST(-0.5411961001461967));
532 y[12] = f32 + f33;
533 y[4] = f34 - f33;
534 f37 = f16 + f22;
535 f38 = MUL_C(f16, COEF_CONST(1.1758756024193588));
536 f39 = MUL_F(f37, FRAC_CONST(-0.9807852804032304));
537 f40 = MUL_F(f22, FRAC_CONST(-0.7856949583871021));
538 f41 = f38 + f39;
539 f42 = f40 - f39;
540 f43 = f18 + f20;
541 f44 = MUL_C(f18, COEF_CONST(1.3870398453221473));
542 f45 = MUL_F(f43, FRAC_CONST(-0.8314696123025455));
543 f46 = MUL_F(f20, FRAC_CONST(-0.2758993792829436));
544 f47 = f44 + f45;
545 f48 = f46 - f45;
546 f49 = f42 - f48;
547 y[2] = f42 + f48;
548 f51 = MUL_F(f49, FRAC_CONST(0.7071067811865476));
549 y[14] = f41 - f47;
550 f53 = f41 + f47;
551 f54 = MUL_F(f53, FRAC_CONST(0.7071067811865476));
552 y[10] = f51 - f54;
553 y[6] = f51 + f54;
554 f57 = f2 - f4;
555 f58 = f2 + f4;
556 f59 = f6 - f8;
557 f60 = f6 + f8;
558 f61 = f10 - f12;
559 f62 = f10 + f12;
560 f63 = MUL_F(f60, FRAC_CONST(0.7071067811865476));
561 f64 = f0 - f63;
562 f65 = f0 + f63;
563 f66 = f58 + f62;
564 f67 = MUL_C(f58, COEF_CONST(1.3065629648763766));
565 f68 = MUL_F(f66, FRAC_CONST(-0.9238795325112866));
566 f69 = MUL_F(f62, FRAC_CONST(-0.5411961001461967));
567 f70 = f67 + f68;
568 f71 = f69 - f68;
569 f72 = f65 - f71;
570 f73 = f65 + f71;
571 f74 = f64 - f70;
572 f75 = f64 + f70;
573 f76 = MUL_F(f59, FRAC_CONST(0.7071067811865476));
574 f77 = f14 - f76;
575 f78 = f14 + f76;
576 f79 = f61 + f57;
577 f80 = MUL_C(f61, COEF_CONST(1.3065629648763766));
578 f81 = MUL_F(f79, FRAC_CONST(-0.9238795325112866));
579 f82 = MUL_F(f57, FRAC_CONST(-0.5411961001461967));
580 f83 = f80 + f81;
581 f84 = f82 - f81;
582 f85 = f78 - f84;
583 f86 = f78 + f84;
584 f87 = f77 - f83;
585 f88 = f77 + f83;
586 f89 = f86 + f73;
587 f90 = MUL_F(f86, FRAC_CONST(-0.8971675863426361));
588 f91 = MUL_F(f89, FRAC_CONST(0.9951847266721968));
589 f92 = MUL_C(f73, COEF_CONST(1.0932018670017576));
590 y[1] = f90 + f91;
591 y[15] = f92 - f91;
592 f95 = f75 - f88;
593 f96 = MUL_F(f88, FRAC_CONST(-0.6666556584777466));
594 f97 = MUL_F(f95, FRAC_CONST(0.9569403357322089));
595 f98 = MUL_C(f75, COEF_CONST(1.2472250129866713));
596 y[3] = f97 - f96;
597 y[13] = f98 - f97;
598 f101 = f87 + f74;
599 f102 = MUL_F(f87, FRAC_CONST(-0.4105245275223571));
600 f103 = MUL_F(f101, FRAC_CONST(0.8819212643483549));
601 f104 = MUL_C(f74, COEF_CONST(1.3533180011743529));
602 y[5] = f102 + f103;
603 y[11] = f104 - f103;
604 f107 = f72 - f85;
605 f108 = MUL_F(f85, FRAC_CONST(-0.1386171691990915));
606 f109 = MUL_F(f107, FRAC_CONST(0.7730104533627370));
607 f110 = MUL_C(f72, COEF_CONST(1.4074037375263826));
608 y[7] = f109 - f108;
609 y[9] = f110 - f109;
610 }
611
612 void DCT4_16(real_t *y, real_t *x)
613 {
614 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10;
615 real_t f11, f12, f13, f14, f15, f16, f17, f18, f19, f20;
616 real_t f21, f22, f23, f24, f25, f26, f27, f28, f29, f30;
617 real_t f31, f32, f33, f34, f35, f36, f37, f38, f39, f40;
618 real_t f41, f42, f43, f44, f45, f46, f47, f48, f49, f50;
619 real_t f51, f52, f53, f54, f55, f56, f57, f58, f59, f60;
620 real_t f61, f62, f63, f64, f65, f66, f67, f68, f69, f70;
621 real_t f71, f72, f73, f74, f75, f76, f77, f78, f79, f80;
622 real_t f81, f82, f83, f84, f85, f86, f87, f88, f89, f90;
623 real_t f91, f92, f93, f94, f95, f96, f97, f98, f99, f100;
624 real_t f101, f102, f103, f104, f105, f106, f107, f108, f109, f110;
625 real_t f111, f112, f113, f114, f115, f116, f117, f118, f119, f120;
626 real_t f121, f122, f123, f124, f125, f126, f127, f128, f130, f132;
627 real_t f134, f136, f138, f140, f142, f144, f145, f148, f149, f152;
628 real_t f153, f156, f157;
629
630 f0 = x[0] + x[15];
631 f1 = MUL_C(COEF_CONST(1.0478631305325901), x[0]);
632 f2 = MUL_F(FRAC_CONST(-0.9987954562051724), f0);
633 f3 = MUL_F(FRAC_CONST(-0.9497277818777548), x[15]);
634 f4 = f1 + f2;
635 f5 = f3 - f2;
636 f6 = x[2] + x[13];
637 f7 = MUL_C(COEF_CONST(1.2130114330978077), x[2]);
638 f8 = MUL_F(FRAC_CONST(-0.9700312531945440), f6);
639 f9 = MUL_F(FRAC_CONST(-0.7270510732912803), x[13]);
640 f10 = f7 + f8;
641 f11 = f9 - f8;
642 f12 = x[4] + x[11];
643 f13 = MUL_C(COEF_CONST(1.3315443865537255), x[4]);
644 f14 = MUL_F(FRAC_CONST(-0.9039892931234433), f12);
645 f15 = MUL_F(FRAC_CONST(-0.4764341996931612), x[11]);
646 f16 = f13 + f14;
647 f17 = f15 - f14;
648 f18 = x[6] + x[9];
649 f19 = MUL_C(COEF_CONST(1.3989068359730781), x[6]);
650 f20 = MUL_F(FRAC_CONST(-0.8032075314806453), f18);
651 f21 = MUL_F(FRAC_CONST(-0.2075082269882124), x[9]);
652 f22 = f19 + f20;
653 f23 = f21 - f20;
654 f24 = x[8] + x[7];
655 f25 = MUL_C(COEF_CONST(1.4125100802019777), x[8]);
656 f26 = MUL_F(FRAC_CONST(-0.6715589548470187), f24);
657 f27 = MUL_F(FRAC_CONST(0.0693921705079402), x[7]);
658 f28 = f25 + f26;
659 f29 = f27 - f26;
660 f30 = x[10] + x[5];
661 f31 = MUL_C(COEF_CONST(1.3718313541934939), x[10]);
662 f32 = MUL_F(FRAC_CONST(-0.5141027441932219), f30);
663 f33 = MUL_F(FRAC_CONST(0.3436258658070501), x[5]);
664 f34 = f31 + f32;
665 f35 = f33 - f32;
666 f36 = x[12] + x[3];
667 f37 = MUL_C(COEF_CONST(1.2784339185752409), x[12]);
668 f38 = MUL_F(FRAC_CONST(-0.3368898533922200), f36);
669 f39 = MUL_F(FRAC_CONST(0.6046542117908008), x[3]);
670 f40 = f37 + f38;
671 f41 = f39 - f38;
672 f42 = x[14] + x[1];
673 f43 = MUL_C(COEF_CONST(1.1359069844201433), x[14]);
674 f44 = MUL_F(FRAC_CONST(-0.1467304744553624), f42);
675 f45 = MUL_F(FRAC_CONST(0.8424460355094185), x[1]);
676 f46 = f43 + f44;
677 f47 = f45 - f44;
678 f48 = f5 - f29;
679 f49 = f5 + f29;
680 f50 = f4 - f28;
681 f51 = f4 + f28;
682 f52 = f11 - f35;
683 f53 = f11 + f35;
684 f54 = f10 - f34;
685 f55 = f10 + f34;
686 f56 = f17 - f41;
687 f57 = f17 + f41;
688 f58 = f16 - f40;
689 f59 = f16 + f40;
690 f60 = f23 - f47;
691 f61 = f23 + f47;
692 f62 = f22 - f46;
693 f63 = f22 + f46;
694 f64 = f48 + f50;
695 f65 = MUL_C(COEF_CONST(1.1758756024193588), f48);
696 f66 = MUL_F(FRAC_CONST(-0.9807852804032304), f64);
697 f67 = MUL_F(FRAC_CONST(-0.7856949583871021), f50);
698 f68 = f65 + f66;
699 f69 = f67 - f66;
700 f70 = f52 + f54;
701 f71 = MUL_C(COEF_CONST(1.3870398453221475), f52);
702 f72 = MUL_F(FRAC_CONST(-0.5555702330196022), f70);
703 f73 = MUL_F(FRAC_CONST(0.2758993792829431), f54);
704 f74 = f71 + f72;
705 f75 = f73 - f72;
706 f76 = f56 + f58;
707 f77 = MUL_F(FRAC_CONST(0.7856949583871022), f56);
708 f78 = MUL_F(FRAC_CONST(0.1950903220161283), f76);
709 f79 = MUL_C(COEF_CONST(1.1758756024193586), f58);
710 f80 = f77 + f78;
711 f81 = f79 - f78;
712 f82 = f60 + f62;
713 f83 = MUL_F(FRAC_CONST(-0.2758993792829430), f60);
714 f84 = MUL_F(FRAC_CONST(0.8314696123025452), f82);
715 f85 = MUL_C(COEF_CONST(1.3870398453221475), f62);
716 f86 = f83 + f84;
717 f87 = f85 - f84;
718 f88 = f49 - f57;
719 f89 = f49 + f57;
720 f90 = f51 - f59;
721 f91 = f51 + f59;
722 f92 = f53 - f61;
723 f93 = f53 + f61;
724 f94 = f55 - f63;
725 f95 = f55 + f63;
726 f96 = f69 - f81;
727 f97 = f69 + f81;
728 f98 = f68 - f80;
729 f99 = f68 + f80;
730 f100 = f75 - f87;
731 f101 = f75 + f87;
732 f102 = f74 - f86;
733 f103 = f74 + f86;
734 f104 = f88 + f90;
735 f105 = MUL_C(COEF_CONST(1.3065629648763766), f88);
736 f106 = MUL_F(FRAC_CONST(-0.9238795325112866), f104);
737 f107 = MUL_F(FRAC_CONST(-0.5411961001461967), f90);
738 f108 = f105 + f106;
739 f109 = f107 - f106;
740 f110 = f92 + f94;
741 f111 = MUL_F(FRAC_CONST(0.5411961001461969), f92);
742 f112 = MUL_F(FRAC_CONST(0.3826834323650898), f110);
743 f113 = MUL_C(COEF_CONST(1.3065629648763766), f94);
744 f114 = f111 + f112;
745 f115 = f113 - f112;
746 f116 = f96 + f98;
747 f117 = MUL_C(COEF_CONST(1.3065629648763766), f96);
748 f118 = MUL_F(FRAC_CONST(-0.9238795325112866), f116);
749 f119 = MUL_F(FRAC_CONST(-0.5411961001461967), f98);
750 f120 = f117 + f118;
751 f121 = f119 - f118;
752 f122 = f100 + f102;
753 f123 = MUL_F(FRAC_CONST(0.5411961001461969), f100);
754 f124 = MUL_F(FRAC_CONST(0.3826834323650898), f122);
755 f125 = MUL_C(COEF_CONST(1.3065629648763766), f102);
756 f126 = f123 + f124;
757 f127 = f125 - f124;
758 f128 = f89 - f93;
759 y[0] = f89 + f93;
760 f130 = f91 - f95;
761 y[15] = f91 + f95;
762 f132 = f109 - f115;
763 y[3] = f109 + f115;
764 f134 = f108 - f114;
765 y[12] = f108 + f114;
766 f136 = f97 - f101;
767 y[1] = f97 + f101;
768 f138 = f99 - f103;
769 y[14] = f99 + f103;
770 f140 = f121 - f127;
771 y[2] = f121 + f127;
772 f142 = f120 - f126;
773 y[13] = f120 + f126;
774 f144 = f128 - f130;
775 f145 = f128 + f130;
776 y[8] = MUL_F(FRAC_CONST(0.7071067811865474), f144);
777 y[7] = MUL_F(FRAC_CONST(0.7071067811865474), f145);
778 f148 = f132 - f134;
779 f149 = f132 + f134;
780 y[11] = MUL_F(FRAC_CONST(0.7071067811865474), f148);
781 y[4] = MUL_F(FRAC_CONST(0.7071067811865474), f149);
782 f152 = f136 - f138;
783 f153 = f136 + f138;
784 y[9] = MUL_F(FRAC_CONST(0.7071067811865474), f152);
785 y[6] = MUL_F(FRAC_CONST(0.7071067811865474), f153);
786 f156 = f140 - f142;
787 f157 = f140 + f142;
788 y[10] = MUL_F(FRAC_CONST(0.7071067811865474), f156);
789 y[5] = MUL_F(FRAC_CONST(0.7071067811865474), f157);
790 }
791
792 void DCT3_32_unscaled(real_t *y, real_t *x)
793 {
794 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10;
795 real_t f11, f12, f13, f14, f15, f16, f17, f18, f19, f20;
796 real_t f21, f22, f23, f24, f25, f26, f27, f28, f29, f30;
797 real_t f31, f32, f33, f34, f35, f36, f37, f38, f39, f40;
798 real_t f41, f42, f43, f44, f45, f46, f47, f48, f49, f50;
799 real_t f51, f52, f53, f54, f55, f56, f57, f58, f59, f60;
800 real_t f61, f62, f63, f64, f65, f66, f67, f68, f69, f70;
801 real_t f71, f72, f73, f74, f75, f76, f77, f78, f79, f80;
802 real_t f81, f82, f83, f84, f85, f86, f87, f88, f89, f90;
803 real_t f91, f92, f93, f94, f95, f96, f97, f98, f99, f100;
804 real_t f101, f102, f103, f104, f105, f106, f107, f108, f109, f110;
805 real_t f111, f112, f113, f114, f115, f116, f117, f118, f119, f120;
806 real_t f121, f122, f123, f124, f125, f126, f127, f128, f129, f130;
807 real_t f131, f132, f133, f134, f135, f136, f137, f138, f139, f140;
808 real_t f141, f142, f143, f144, f145, f146, f147, f148, f149, f150;
809 real_t f151, f152, f153, f154, f155, f156, f157, f158, f159, f160;
810 real_t f161, f162, f163, f164, f165, f166, f167, f168, f169, f170;
811 real_t f171, f172, f173, f174, f175, f176, f177, f178, f179, f180;
812 real_t f181, f182, f183, f184, f185, f186, f187, f188, f189, f190;
813 real_t f191, f192, f193, f194, f195, f196, f197, f198, f199, f200;
814 real_t f201, f202, f203, f204, f205, f206, f207, f208, f209, f210;
815 real_t f211, f212, f213, f214, f215, f216, f217, f218, f219, f220;
816 real_t f221, f222, f223, f224, f225, f226, f227, f228, f229, f230;
817 real_t f231, f232, f233, f234, f235, f236, f237, f238, f239, f240;
818 real_t f241, f242, f243, f244, f245, f246, f247, f248, f249, f250;
819 real_t f251, f252, f253, f254, f255, f256, f257, f258, f259, f260;
820 real_t f261, f262, f263, f264, f265, f266, f267, f268, f269, f270;
821 real_t f271, f272;
822
823 f0 = MUL_F(x[16], FRAC_CONST(0.7071067811865476));
824 f1 = x[0] - f0;
825 f2 = x[0] + f0;
826 f3 = x[8] + x[24];
827 f4 = MUL_C(x[8], COEF_CONST(1.3065629648763766));
828 f5 = MUL_F(f3, FRAC_CONST((-0.9238795325112866)));
829 f6 = MUL_F(x[24], FRAC_CONST((-0.5411961001461967)));
830 f7 = f4 + f5;
831 f8 = f6 - f5;
832 f9 = f2 - f8;
833 f10 = f2 + f8;
834 f11 = f1 - f7;
835 f12 = f1 + f7;
836 f13 = x[4] + x[28];
837 f14 = MUL_C(x[4], COEF_CONST(1.1758756024193588));
838 f15 = MUL_F(f13, FRAC_CONST((-0.9807852804032304)));
839 f16 = MUL_F(x[28], FRAC_CONST((-0.7856949583871021)));
840 f17 = f14 + f15;
841 f18 = f16 - f15;
842 f19 = x[12] + x[20];
843 f20 = MUL_C(x[12], COEF_CONST(1.3870398453221473));
844 f21 = MUL_F(f19, FRAC_CONST((-0.8314696123025455)));
845 f22 = MUL_F(x[20], FRAC_CONST((-0.2758993792829436)));
846 f23 = f20 + f21;
847 f24 = f22 - f21;
848 f25 = f18 - f24;
849 f26 = f18 + f24;
850 f27 = MUL_F(f25, FRAC_CONST(0.7071067811865476));
851 f28 = f17 - f23;
852 f29 = f17 + f23;
853 f30 = MUL_F(f29, FRAC_CONST(0.7071067811865476));
854 f31 = f27 - f30;
855 f32 = f27 + f30;
856 f33 = f10 - f26;
857 f34 = f10 + f26;
858 f35 = f12 - f32;
859 f36 = f12 + f32;
860 f37 = f11 - f31;
861 f38 = f11 + f31;
862 f39 = f9 - f28;
863 f40 = f9 + f28;
864 f41 = x[2] + x[30];
865 f42 = MUL_C(x[2], COEF_CONST(1.0932018670017569));
866 f43 = MUL_F(f41, FRAC_CONST((-0.9951847266721969)));
867 f44 = MUL_F(x[30], FRAC_CONST((-0.8971675863426368)));
868 f45 = f42 + f43;
869 f46 = f44 - f43;
870 f47 = x[6] + x[26];
871 f48 = MUL_C(x[6], COEF_CONST(1.2472250129866711));
872 f49 = MUL_F(f47, FRAC_CONST((-0.9569403357322089)));
873 f50 = MUL_F(x[26], FRAC_CONST((-0.6666556584777469)));
874 f51 = f48 + f49;
875 f52 = f50 - f49;
876 f53 = x[10] + x[22];
877 f54 = MUL_C(x[10], COEF_CONST(1.3533180011743526));
878 f55 = MUL_F(f53, FRAC_CONST((-0.8819212643483551)));
879 f56 = MUL_F(x[22], FRAC_CONST((-0.4105245275223575)));
880 f57 = f54 + f55;
881 f58 = f56 - f55;
882 f59 = x[14] + x[18];
883 f60 = MUL_C(x[14], COEF_CONST(1.4074037375263826));
884 f61 = MUL_F(f59, FRAC_CONST((-0.7730104533627369)));
885 f62 = MUL_F(x[18], FRAC_CONST((-0.1386171691990913)));
886 f63 = f60 + f61;
887 f64 = f62 - f61;
888 f65 = f46 - f64;
889 f66 = f46 + f64;
890 f67 = f52 - f58;
891 f68 = f52 + f58;
892 f69 = f66 - f68;
893 f70 = f66 + f68;
894 f71 = MUL_F(f69, FRAC_CONST(0.7071067811865476));
895 f72 = f65 + f67;
896 f73 = MUL_C(f65, COEF_CONST(1.3065629648763766));
897 f74 = MUL_F(f72, FRAC_CONST((-0.9238795325112866)));
898 f75 = MUL_F(f67, FRAC_CONST((-0.5411961001461967)));
899 f76 = f73 + f74;
900 f77 = f75 - f74;
901 f78 = f45 - f63;
902 f79 = f45 + f63;
903 f80 = f51 - f57;
904 f81 = f51 + f57;
905 f82 = f79 + f81;
906 f83 = MUL_C(f79, COEF_CONST(1.3065629648763770));
907 f84 = MUL_F(f82, FRAC_CONST((-0.3826834323650904)));
908 f85 = MUL_F(f81, FRAC_CONST(0.5411961001461961));
909 f86 = f83 + f84;
910 f87 = f85 - f84;
911 f88 = f78 - f80;
912 f89 = f78 + f80;
913 f90 = MUL_F(f89, FRAC_CONST(0.7071067811865476));
914 f91 = f77 - f87;
915 f92 = f77 + f87;
916 f93 = f71 - f90;
917 f94 = f71 + f90;
918 f95 = f76 - f86;
919 f96 = f76 + f86;
920 f97 = f34 - f70;
921 f98 = f34 + f70;
922 f99 = f36 - f92;
923 f100 = f36 + f92;
924 f101 = f38 - f91;
925 f102 = f38 + f91;
926 f103 = f40 - f94;
927 f104 = f40 + f94;
928 f105 = f39 - f93;
929 f106 = f39 + f93;
930 f107 = f37 - f96;
931 f108 = f37 + f96;
932 f109 = f35 - f95;
933 f110 = f35 + f95;
934 f111 = f33 - f88;
935 f112 = f33 + f88;
936 f113 = x[1] + x[31];
937 f114 = MUL_C(x[1], COEF_CONST(1.0478631305325901));
938 f115 = MUL_F(f113, FRAC_CONST((-0.9987954562051724)));
939 f116 = MUL_F(x[31], FRAC_CONST((-0.9497277818777548)));
940 f117 = f114 + f115;
941 f118 = f116 - f115;
942 f119 = x[5] + x[27];
943 f120 = MUL_C(x[5], COEF_CONST(1.2130114330978077));
944 f121 = MUL_F(f119, FRAC_CONST((-0.9700312531945440)));
945 f122 = MUL_F(x[27], FRAC_CONST((-0.7270510732912803)));
946 f123 = f120 + f121;
947 f124 = f122 - f121;
948 f125 = x[9] + x[23];
949 f126 = MUL_C(x[9], COEF_CONST(1.3315443865537255));
950 f127 = MUL_F(f125, FRAC_CONST((-0.9039892931234433)));
951 f128 = MUL_F(x[23], FRAC_CONST((-0.4764341996931612)));
952 f129 = f126 + f127;
953 f130 = f128 - f127;
954 f131 = x[13] + x[19];
955 f132 = MUL_C(x[13], COEF_CONST(1.3989068359730781));
956 f133 = MUL_F(f131, FRAC_CONST((-0.8032075314806453)));
957 f134 = MUL_F(x[19], FRAC_CONST((-0.2075082269882124)));
958 f135 = f132 + f133;
959 f136 = f134 - f133;
960 f137 = x[17] + x[15];
961 f138 = MUL_C(x[17], COEF_CONST(1.4125100802019777));
962 f139 = MUL_F(f137, FRAC_CONST((-0.6715589548470187)));
963 f140 = MUL_F(x[15], FRAC_CONST(0.0693921705079402));
964 f141 = f138 + f139;
965 f142 = f140 - f139;
966 f143 = x[21] + x[11];
967 f144 = MUL_C(x[21], COEF_CONST(1.3718313541934939));
968 f145 = MUL_F(f143, FRAC_CONST((-0.5141027441932219)));
969 f146 = MUL_F(x[11], FRAC_CONST(0.3436258658070501));
970 f147 = f144 + f145;
971 f148 = f146 - f145;
972 f149 = x[25] + x[7];
973 f150 = MUL_C(x[25], COEF_CONST(1.2784339185752409));
974 f151 = MUL_F(f149, FRAC_CONST((-0.3368898533922200)));
975 f152 = MUL_F(x[7], FRAC_CONST(0.6046542117908008));
976 f153 = f150 + f151;
977 f154 = f152 - f151;
978 f155 = x[29] + x[3];
979 f156 = MUL_C(x[29], COEF_CONST(1.1359069844201433));
980 f157 = MUL_F(f155, FRAC_CONST((-0.1467304744553624)));
981 f158 = MUL_F(x[3], FRAC_CONST(0.8424460355094185));
982 f159 = f156 + f157;
983 f160 = f158 - f157;
984 f161 = f118 - f142;
985 f162 = f118 + f142;
986 f163 = f117 - f141;
987 f164 = f117 + f141;
988 f165 = f124 - f148;
989 f166 = f124 + f148;
990 f167 = f123 - f147;
991 f168 = f123 + f147;
992 f169 = f130 - f154;
993 f170 = f130 + f154;
994 f171 = f129 - f153;
995 f172 = f129 + f153;
996 f173 = f136 - f160;
997 f174 = f136 + f160;
998 f175 = f135 - f159;
999 f176 = f135 + f159;
1000 f177 = f161 + f163;
1001 f178 = MUL_C(f161, COEF_CONST(1.1758756024193588));
1002 f179 = MUL_F(f177, FRAC_CONST((-0.9807852804032304)));
1003 f180 = MUL_F(f163, FRAC_CONST((-0.7856949583871021)));
1004 f181 = f178 + f179;
1005 f182 = f180 - f179;
1006 f183 = f165 + f167;
1007 f184 = MUL_C(f165, COEF_CONST(1.3870398453221475));
1008 f185 = MUL_F(f183, FRAC_CONST((-0.5555702330196022)));
1009 f186 = MUL_F(f167, FRAC_CONST(0.2758993792829431));
1010 f187 = f184 + f185;
1011 f188 = f186 - f185;
1012 f189 = f169 + f171;
1013 f190 = MUL_F(f169, FRAC_CONST(0.7856949583871022));
1014 f191 = MUL_F(f189, FRAC_CONST(0.1950903220161283));
1015 f192 = MUL_C(f171, COEF_CONST(1.1758756024193586));
1016 f193 = f190 + f191;
1017 f194 = f192 - f191;
1018 f195 = f173 + f175;
1019 f196 = MUL_F(f173, FRAC_CONST((-0.2758993792829430)));
1020 f197 = MUL_F(f195, FRAC_CONST(0.8314696123025452));
1021 f198 = MUL_C(f175, COEF_CONST(1.3870398453221475));
1022 f199 = f196 + f197;
1023 f200 = f198 - f197;
1024 f201 = f162 - f170;
1025 f202 = f162 + f170;
1026 f203 = f164 - f172;
1027 f204 = f164 + f172;
1028 f205 = f166 - f174;
1029 f206 = f166 + f174;
1030 f207 = f168 - f176;
1031 f208 = f168 + f176;
1032 f209 = f182 - f194;
1033 f210 = f182 + f194;
1034 f211 = f181 - f193;
1035 f212 = f181 + f193;
1036 f213 = f188 - f200;
1037 f214 = f188 + f200;
1038 f215 = f187 - f199;
1039 f216 = f187 + f199;
1040 f217 = f201 + f203;
1041 f218 = MUL_C(f201, COEF_CONST(1.3065629648763766));
1042 f219 = MUL_F(f217, FRAC_CONST((-0.9238795325112866)));
1043 f220 = MUL_F(f203, FRAC_CONST((-0.5411961001461967)));
1044 f221 = f218 + f219;
1045 f222 = f220 - f219;
1046 f223 = f205 + f207;
1047 f224 = MUL_F(f205, FRAC_CONST(0.5411961001461969));
1048 f225 = MUL_F(f223, FRAC_CONST(0.3826834323650898));
1049 f226 = MUL_C(f207, COEF_CONST(1.3065629648763766));
1050 f227 = f224 + f225;
1051 f228 = f226 - f225;
1052 f229 = f209 + f211;
1053 f230 = MUL_C(f209, COEF_CONST(1.3065629648763766));
1054 f231 = MUL_F(f229, FRAC_CONST((-0.9238795325112866)));
1055 f232 = MUL_F(f211, FRAC_CONST((-0.5411961001461967)));
1056 f233 = f230 + f231;
1057 f234 = f232 - f231;
1058 f235 = f213 + f215;
1059 f236 = MUL_F(f213, FRAC_CONST(0.5411961001461969));
1060 f237 = MUL_F(f235, FRAC_CONST(0.3826834323650898));
1061 f238 = MUL_C(f215, COEF_CONST(1.3065629648763766));
1062 f239 = f236 + f237;
1063 f240 = f238 - f237;
1064 f241 = f202 - f206;
1065 f242 = f202 + f206;
1066 f243 = f204 - f208;
1067 f244 = f204 + f208;
1068 f245 = f222 - f228;
1069 f246 = f222 + f228;
1070 f247 = f221 - f227;
1071 f248 = f221 + f227;
1072 f249 = f210 - f214;
1073 f250 = f210 + f214;
1074 f251 = f212 - f216;
1075 f252 = f212 + f216;
1076 f253 = f234 - f240;
1077 f254 = f234 + f240;
1078 f255 = f233 - f239;
1079 f256 = f233 + f239;
1080 f257 = f241 - f243;
1081 f258 = f241 + f243;
1082 f259 = MUL_F(f257, FRAC_CONST(0.7071067811865474));
1083 f260 = MUL_F(f258, FRAC_CONST(0.7071067811865474));
1084 f261 = f245 - f247;
1085 f262 = f245 + f247;
1086 f263 = MUL_F(f261, FRAC_CONST(0.7071067811865474));
1087 f264 = MUL_F(f262, FRAC_CONST(0.7071067811865474));
1088 f265 = f249 - f251;
1089 f266 = f249 + f251;
1090 f267 = MUL_F(f265, FRAC_CONST(0.7071067811865474));
1091 f268 = MUL_F(f266, FRAC_CONST(0.7071067811865474));
1092 f269 = f253 - f255;
1093 f270 = f253 + f255;
1094 f271 = MUL_F(f269, FRAC_CONST(0.7071067811865474));
1095 f272 = MUL_F(f270, FRAC_CONST(0.7071067811865474));
1096 y[31] = f98 - f242;
1097 y[0] = f98 + f242;
1098 y[30] = f100 - f250;
1099 y[1] = f100 + f250;
1100 y[29] = f102 - f254;
1101 y[2] = f102 + f254;
1102 y[28] = f104 - f246;
1103 y[3] = f104 + f246;
1104 y[27] = f106 - f264;
1105 y[4] = f106 + f264;
1106 y[26] = f108 - f272;
1107 y[5] = f108 + f272;
1108 y[25] = f110 - f268;
1109 y[6] = f110 + f268;
1110 y[24] = f112 - f260;
1111 y[7] = f112 + f260;
1112 y[23] = f111 - f259;
1113 y[8] = f111 + f259;
1114 y[22] = f109 - f267;
1115 y[9] = f109 + f267;
1116 y[21] = f107 - f271;
1117 y[10] = f107 + f271;
1118 y[20] = f105 - f263;
1119 y[11] = f105 + f263;
1120 y[19] = f103 - f248;
1121 y[12] = f103 + f248;
1122 y[18] = f101 - f256;
1123 y[13] = f101 + f256;
1124 y[17] = f99 - f252;
1125 y[14] = f99 + f252;
1126 y[16] = f97 - f244;
1127 y[15] = f97 + f244;
1128 }
1129
1130 void DCT2_32_unscaled(real_t *y, real_t *x)
1131 {
1132 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10;
1133 real_t f11, f12, f13, f14, f15, f16, f17, f18, f19, f20;
1134 real_t f21, f22, f23, f24, f25, f26, f27, f28, f29, f30;
1135 real_t f31, f32, f33, f34, f35, f36, f37, f38, f39, f40;
1136 real_t f41, f42, f43, f44, f45, f46, f47, f48, f49, f50;
1137 real_t f51, f52, f53, f54, f55, f56, f57, f58, f59, f60;
1138 real_t f63, f64, f65, f66, f69, f70, f71, f72, f73, f74;
1139 real_t f75, f76, f77, f78, f79, f80, f81, f83, f85, f86;
1140 real_t f89, f90, f91, f92, f93, f94, f95, f96, f97, f98;
1141 real_t f99, f100, f101, f102, f103, f104, f105, f106, f107, f108;
1142 real_t f109, f110, f111, f112, f113, f114, f115, f116, f117, f118;
1143 real_t f119, f120, f121, f122, f123, f124, f127, f128, f129, f130;
1144 real_t f133, f134, f135, f136, f139, f140, f141, f142, f145, f146;
1145 real_t f147, f148, f149, f150, f151, f152, f153, f154, f155, f156;
1146 real_t f157, f158, f159, f160, f161, f162, f163, f164, f165, f166;
1147 real_t f167, f168, f169, f170, f171, f172, f173, f174, f175, f176;
1148 real_t f177, f178, f179, f180, f181, f182, f183, f184, f185, f186;
1149 real_t f187, f188, f189, f190, f191, f192, f193, f194, f195, f196;
1150 real_t f197, f198, f199, f200, f201, f202, f203, f204, f205, f206;
1151 real_t f207, f208, f209, f210, f211, f212, f213, f214, f215, f216;
1152 real_t f217, f218, f219, f220, f221, f222, f223, f224, f225, f226;
1153 real_t f227, f228, f229, f230, f231, f232, f233, f234, f235, f236;
1154 real_t f237, f238, f239, f240, f241, f242, f243, f244, f247, f248;
1155 real_t f249, f250, f253, f254, f255, f256, f259, f260, f261, f262;
1156 real_t f265, f266, f267, f268, f271, f272, f273, f274, f277, f278;
1157 real_t f279, f280, f283, f284, f285, f286;
1158
1159 f0 = x[0] - x[31];
1160 f1 = x[0] + x[31];
1161 f2 = x[1] - x[30];
1162 f3 = x[1] + x[30];
1163 f4 = x[2] - x[29];
1164 f5 = x[2] + x[29];
1165 f6 = x[3] - x[28];
1166 f7 = x[3] + x[28];
1167 f8 = x[4] - x[27];
1168 f9 = x[4] + x[27];
1169 f10 = x[5] - x[26];
1170 f11 = x[5] + x[26];
1171 f12 = x[6] - x[25];
1172 f13 = x[6] + x[25];
1173 f14 = x[7] - x[24];
1174 f15 = x[7] + x[24];
1175 f16 = x[8] - x[23];
1176 f17 = x[8] + x[23];
1177 f18 = x[9] - x[22];
1178 f19 = x[9] + x[22];
1179 f20 = x[10] - x[21];
1180 f21 = x[10] + x[21];
1181 f22 = x[11] - x[20];
1182 f23 = x[11] + x[20];
1183 f24 = x[12] - x[19];
1184 f25 = x[12] + x[19];
1185 f26 = x[13] - x[18];
1186 f27 = x[13] + x[18];
1187 f28 = x[14] - x[17];
1188 f29 = x[14] + x[17];
1189 f30 = x[15] - x[16];
1190 f31 = x[15] + x[16];
1191 f32 = f1 - f31;
1192 f33 = f1 + f31;
1193 f34 = f3 - f29;
1194 f35 = f3 + f29;
1195 f36 = f5 - f27;
1196 f37 = f5 + f27;
1197 f38 = f7 - f25;
1198 f39 = f7 + f25;
1199 f40 = f9 - f23;
1200 f41 = f9 + f23;
1201 f42 = f11 - f21;
1202 f43 = f11 + f21;
1203 f44 = f13 - f19;
1204 f45 = f13 + f19;
1205 f46 = f15 - f17;
1206 f47 = f15 + f17;
1207 f48 = f33 - f47;
1208 f49 = f33 + f47;
1209 f50 = f35 - f45;
1210 f51 = f35 + f45;
1211 f52 = f37 - f43;
1212 f53 = f37 + f43;
1213 f54 = f39 - f41;
1214 f55 = f39 + f41;
1215 f56 = f49 - f55;
1216 f57 = f49 + f55;
1217 f58 = f51 - f53;
1218 f59 = f51 + f53;
1219 f60 = f57 - f59;
1220 y[0] = f57 + f59;
1221 y[16] = MUL_F(FRAC_CONST(0.7071067811865476), f60);
1222 f63 = f56 + f58;
1223 f64 = MUL_C(COEF_CONST(1.3065629648763766), f56);
1224 f65 = MUL_F(FRAC_CONST(-0.9238795325112866), f63);
1225 f66 = MUL_F(FRAC_CONST(-0.5411961001461967), f58);
1226 y[24] = f64 + f65;
1227 y[8] = f66 - f65;
1228 f69 = f48 + f54;
1229 f70 = MUL_C(COEF_CONST(1.1758756024193588), f48);
1230 f71 = MUL_F(FRAC_CONST(-0.9807852804032304), f69);
1231 f72 = MUL_F(FRAC_CONST(-0.7856949583871021), f54);
1232 f73 = f70 + f71;
1233 f74 = f72 - f71;
1234 f75 = f50 + f52;
1235 f76 = MUL_C(COEF_CONST(1.3870398453221473), f50);
1236 f77 = MUL_F(FRAC_CONST(-0.8314696123025455), f75);
1237 f78 = MUL_F(FRAC_CONST(-0.2758993792829436), f52);
1238 f79 = f76 + f77;
1239 f80 = f78 - f77;
1240 f81 = f74 - f80;
1241 y[4] = f74 + f80;
1242 f83 = MUL_F(FRAC_CONST(0.7071067811865476), f81);
1243 y[28] = f73 - f79;
1244 f85 = f73 + f79;
1245 f86 = MUL_F(FRAC_CONST(0.7071067811865476), f85);
1246 y[20] = f83 - f86;
1247 y[12] = f83 + f86;
1248 f89 = f34 - f36;
1249 f90 = f34 + f36;
1250 f91 = f38 - f40;
1251 f92 = f38 + f40;
1252 f93 = f42 - f44;
1253 f94 = f42 + f44;
1254 f95 = MUL_F(FRAC_CONST(0.7071067811865476), f92);
1255 f96 = f32 - f95;
1256 f97 = f32 + f95;
1257 f98 = f90 + f94;
1258 f99 = MUL_C(COEF_CONST(1.3065629648763766), f90);
1259 f100 = MUL_F(FRAC_CONST(-0.9238795325112866), f98);
1260 f101 = MUL_F(FRAC_CONST(-0.5411961001461967), f94);
1261 f102 = f99 + f100;
1262 f103 = f101 - f100;
1263 f104 = f97 - f103;
1264 f105 = f97 + f103;
1265 f106 = f96 - f102;
1266 f107 = f96 + f102;
1267 f108 = MUL_F(FRAC_CONST(0.7071067811865476), f91);
1268 f109 = f46 - f108;
1269 f110 = f46 + f108;
1270 f111 = f93 + f89;
1271 f112 = MUL_C(COEF_CONST(1.3065629648763766), f93);
1272 f113 = MUL_F(FRAC_CONST(-0.9238795325112866), f111);
1273 f114 = MUL_F(FRAC_CONST(-0.5411961001461967), f89);
1274 f115 = f112 + f113;
1275 f116 = f114 - f113;
1276 f117 = f110 - f116;
1277 f118 = f110 + f116;
1278 f119 = f109 - f115;
1279 f120 = f109 + f115;
1280 f121 = f118 + f105;
1281 f122 = MUL_F(FRAC_CONST(-0.8971675863426361), f118);
1282 f123 = MUL_F(FRAC_CONST(0.9951847266721968), f121);
1283 f124 = MUL_C(COEF_CONST(1.0932018670017576), f105);
1284 y[2] = f122 + f123;
1285 y[30] = f124 - f123;
1286 f127 = f107 - f120;
1287 f128 = MUL_F(FRAC_CONST(-0.6666556584777466), f120);
1288 f129 = MUL_F(FRAC_CONST(0.9569403357322089), f127);
1289 f130 = MUL_C(COEF_CONST(1.2472250129866713), f107);
1290 y[6] = f129 - f128;
1291 y[26] = f130 - f129;
1292 f133 = f119 + f106;
1293 f134 = MUL_F(FRAC_CONST(-0.4105245275223571), f119);
1294 f135 = MUL_F(FRAC_CONST(0.8819212643483549), f133);
1295 f136 = MUL_C(COEF_CONST(1.3533180011743529), f106);
1296 y[10] = f134 + f135;
1297 y[22] = f136 - f135;
1298 f139 = f104 - f117;
1299 f140 = MUL_F(FRAC_CONST(-0.1386171691990915), f117);
1300 f141 = MUL_F(FRAC_CONST(0.7730104533627370), f139);
1301 f142 = MUL_C(COEF_CONST(1.4074037375263826), f104);
1302 y[14] = f141 - f140;
1303 y[18] = f142 - f141;
1304 f145 = f2 - f4;
1305 f146 = f2 + f4;
1306 f147 = f6 - f8;
1307 f148 = f6 + f8;
1308 f149 = f10 - f12;
1309 f150 = f10 + f12;
1310 f151 = f14 - f16;
1311 f152 = f14 + f16;
1312 f153 = f18 - f20;
1313 f154 = f18 + f20;
1314 f155 = f22 - f24;
1315 f156 = f22 + f24;
1316 f157 = f26 - f28;
1317 f158 = f26 + f28;
1318 f159 = MUL_F(FRAC_CONST(0.7071067811865476), f152);
1319 f160 = f0 - f159;
1320 f161 = f0 + f159;
1321 f162 = f148 + f156;
1322 f163 = MUL_C(COEF_CONST(1.3065629648763766), f148);
1323 f164 = MUL_F(FRAC_CONST(-0.9238795325112866), f162);
1324 f165 = MUL_F(FRAC_CONST(-0.5411961001461967), f156);
1325 f166 = f163 + f164;
1326 f167 = f165 - f164;
1327 f168 = f161 - f167;
1328 f169 = f161 + f167;
1329 f170 = f160 - f166;
1330 f171 = f160 + f166;
1331 f172 = f146 + f158;
1332 f173 = MUL_C(COEF_CONST(1.1758756024193588), f146);
1333 f174 = MUL_F(FRAC_CONST(-0.9807852804032304), f172);
1334 f175 = MUL_F(FRAC_CONST(-0.7856949583871021), f158);
1335 f176 = f173 + f174;
1336 f177 = f175 - f174;
1337 f178 = f150 + f154;
1338 f179 = MUL_C(COEF_CONST(1.3870398453221473), f150);
1339 f180 = MUL_F(FRAC_CONST(-0.8314696123025455), f178);
1340 f181 = MUL_F(FRAC_CONST(-0.2758993792829436), f154);
1341 f182 = f179 + f180;
1342 f183 = f181 - f180;
1343 f184 = f177 - f183;
1344 f185 = f177 + f183;
1345 f186 = MUL_F(FRAC_CONST(0.7071067811865476), f184);
1346 f187 = f176 - f182;
1347 f188 = f176 + f182;
1348 f189 = MUL_F(FRAC_CONST(0.7071067811865476), f188);
1349 f190 = f186 - f189;
1350 f191 = f186 + f189;
1351 f192 = f169 - f185;
1352 f193 = f169 + f185;
1353 f194 = f171 - f191;
1354 f195 = f171 + f191;
1355 f196 = f170 - f190;
1356 f197 = f170 + f190;
1357 f198 = f168 - f187;
1358 f199 = f168 + f187;
1359 f200 = MUL_F(FRAC_CONST(0.7071067811865476), f151);
1360 f201 = f30 - f200;
1361 f202 = f30 + f200;
1362 f203 = f155 + f147;
1363 f204 = MUL_C(COEF_CONST(1.3065629648763766), f155);
1364 f205 = MUL_F(FRAC_CONST(-0.9238795325112866), f203);
1365 f206 = MUL_F(FRAC_CONST(-0.5411961001461967), f147);
1366 f207 = f204 + f205;
1367 f208 = f206 - f205;
1368 f209 = f202 - f208;
1369 f210 = f202 + f208;
1370 f211 = f201 - f207;
1371 f212 = f201 + f207;
1372 f213 = f157 + f145;
1373 f214 = MUL_C(COEF_CONST(1.1758756024193588), f157);
1374 f215 = MUL_F(FRAC_CONST(-0.9807852804032304), f213);
1375 f216 = MUL_F(FRAC_CONST(-0.7856949583871021), f145);
1376 f217 = f214 + f215;
1377 f218 = f216 - f215;
1378 f219 = f153 + f149;
1379 f220 = MUL_C(COEF_CONST(1.3870398453221473), f153);
1380 f221 = MUL_F(FRAC_CONST(-0.8314696123025455), f219);
1381 f222 = MUL_F(FRAC_CONST(-0.2758993792829436), f149);
1382 f223 = f220 + f221;
1383 f224 = f222 - f221;
1384 f225 = f218 - f224;
1385 f226 = f218 + f224;
1386 f227 = MUL_F(FRAC_CONST(0.7071067811865476), f225);
1387 f228 = f217 - f223;
1388 f229 = f217 + f223;
1389 f230 = MUL_F(FRAC_CONST(0.7071067811865476), f229);
1390 f231 = f227 - f230;
1391 f232 = f227 + f230;
1392 f233 = f210 - f226;
1393 f234 = f210 + f226;
1394 f235 = f212 - f232;
1395 f236 = f212 + f232;
1396 f237 = f211 - f231;
1397 f238 = f211 + f231;
1398 f239 = f209 - f228;
1399 f240 = f209 + f228;
1400 f241 = f234 + f193;
1401 f242 = MUL_F(FRAC_CONST(-0.9497277818777543), f234);
1402 f243 = MUL_F(FRAC_CONST(0.9987954562051724), f241);
1403 f244 = MUL_C(COEF_CONST(1.0478631305325905), f193);
1404 y[1] = f242 + f243;
1405 y[31] = f244 - f243;
1406 f247 = f195 - f236;
1407 f248 = MUL_F(FRAC_CONST(-0.8424460355094192), f236);
1408 f249 = MUL_F(FRAC_CONST(0.9891765099647810), f247);
1409 f250 = MUL_C(COEF_CONST(1.1359069844201428), f195);
1410 y[3] = f249 - f248;
1411 y[29] = f250 - f249;
1412 f253 = f238 + f197;
1413 f254 = MUL_F(FRAC_CONST(-0.7270510732912801), f238);
1414 f255 = MUL_F(FRAC_CONST(0.9700312531945440), f253);
1415 f256 = MUL_C(COEF_CONST(1.2130114330978079), f197);
1416 y[5] = f254 + f255;
1417 y[27] = f256 - f255;
1418 f259 = f199 - f240;
1419 f260 = MUL_F(FRAC_CONST(-0.6046542117908007), f240);
1420 f261 = MUL_F(FRAC_CONST(0.9415440651830208), f259);
1421 f262 = MUL_C(COEF_CONST(1.2784339185752409), f199);
1422 y[7] = f261 - f260;
1423 y[25] = f262 - f261;
1424 f265 = f239 + f198;
1425 f266 = MUL_F(FRAC_CONST(-0.4764341996931611), f239);
1426 f267 = MUL_F(FRAC_CONST(0.9039892931234433), f265);
1427 f268 = MUL_C(COEF_CONST(1.3315443865537255), f198);
1428 y[9] = f266 + f267;
1429 y[23] = f268 - f267;
1430 f271 = f196 - f237;
1431 f272 = MUL_F(FRAC_CONST(-0.3436258658070505), f237);
1432 f273 = MUL_F(FRAC_CONST(0.8577286100002721), f271);
1433 f274 = MUL_C(COEF_CONST(1.3718313541934939), f196);
1434 y[11] = f273 - f272;
1435 y[21] = f274 - f273;
1436 f277 = f235 + f194;
1437 f278 = MUL_F(FRAC_CONST(-0.2075082269882114), f235);
1438 f279 = MUL_F(FRAC_CONST(0.8032075314806448), f277);
1439 f280 = MUL_C(COEF_CONST(1.3989068359730783), f194);
1440 y[13] = f278 + f279;
1441 y[19] = f280 - f279;
1442 f283 = f192 - f233;
1443 f284 = MUL_F(FRAC_CONST(-0.0693921705079408), f233);
1444 f285 = MUL_F(FRAC_CONST(0.7409511253549591), f283);
1445 f286 = MUL_C(COEF_CONST(1.4125100802019774), f192);
1446 y[15] = f285 - f284;
1447 y[17] = f286 - f285;
1448 }
1449
1450 #else
1451
1452
1453 #define n 32
1454 #define log2n 5
1455
1456 // w_array_real[i] = cos(2*M_PI*i/32)
1457 static const real_t w_array_real[] = {
1458 FRAC_CONST(1.000000000000000), FRAC_CONST(0.980785279337272),
1459 FRAC_CONST(0.923879528329380), FRAC_CONST(0.831469603195765),
1460 FRAC_CONST(0.707106765732237), FRAC_CONST(0.555570210304169),
1461 FRAC_CONST(0.382683402077046), FRAC_CONST(0.195090284503576),
1462 FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090370246552),
1463 FRAC_CONST(-0.382683482845162), FRAC_CONST(-0.555570282993553),
1464 FRAC_CONST(-0.707106827549476), FRAC_CONST(-0.831469651765257),
1465 FRAC_CONST(-0.923879561784627), FRAC_CONST(-0.980785296392607)
1466 };
1467
1468 // w_array_imag[i] = sin(-2*M_PI*i/32)
1469 static const real_t w_array_imag[] = {
1470 FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090327375064),
1471 FRAC_CONST(-0.382683442461104), FRAC_CONST(-0.555570246648862),
1472 FRAC_CONST(-0.707106796640858), FRAC_CONST(-0.831469627480512),
1473 FRAC_CONST(-0.923879545057005), FRAC_CONST(-0.980785287864940),
1474 FRAC_CONST(-1.000000000000000), FRAC_CONST(-0.980785270809601),
1475 FRAC_CONST(-0.923879511601754), FRAC_CONST(-0.831469578911016),
1476 FRAC_CONST(-0.707106734823616), FRAC_CONST(-0.555570173959476),
1477 FRAC_CONST(-0.382683361692986), FRAC_CONST(-0.195090241632088)
1478 };
1479
1480 // FFT decimation in frequency
1481 // 4*16*2+16=128+16=144 multiplications
1482 // 6*16*2+10*8+4*16*2=192+80+128=400 additions
1483 static void fft_dif(real_t * Real, real_t * Imag)
1484 {
1485 real_t w_real, w_imag; // For faster access
1486 real_t point1_real, point1_imag, point2_real, point2_imag; // For faster access
1487 uint32_t j, i, i2, w_index; // Counters
1488
1489 // First 2 stages of 32 point FFT decimation in frequency
1490 // 4*16*2=64*2=128 multiplications
1491 // 6*16*2=96*2=192 additions
1492 // Stage 1 of 32 point FFT decimation in frequency
1493 for (i = 0; i < 16; i++)
1494 {
1495 point1_real = Real[i];
1496 point1_imag = Imag[i];
1497 i2 = i+16;
1498 point2_real = Real[i2];
1499 point2_imag = Imag[i2];
1500
1501 w_real = w_array_real[i];
1502 w_imag = w_array_imag[i];
1503
1504 // temp1 = x[i] - x[i2]
1505 point1_real -= point2_real;
1506 point1_imag -= point2_imag;
1507
1508 // x[i1] = x[i] + x[i2]
1509 Real[i] += point2_real;
1510 Imag[i] += point2_imag;
1511
1512 // x[i2] = (x[i] - x[i2]) * w
1513 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1514 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1515 }
1516 // Stage 2 of 32 point FFT decimation in frequency
1517 for (j = 0, w_index = 0; j < 8; j++, w_index += 2)
1518 {
1519 w_real = w_array_real[w_index];
1520 w_imag = w_array_imag[w_index];
1521
1522 i = j;
1523 point1_real = Real[i];
1524 point1_imag = Imag[i];
1525 i2 = i+8;
1526 point2_real = Real[i2];
1527 point2_imag = Imag[i2];
1528
1529 // temp1 = x[i] - x[i2]
1530 point1_real -= point2_real;
1531 point1_imag -= point2_imag;
1532
1533 // x[i1] = x[i] + x[i2]
1534 Real[i] += point2_real;
1535 Imag[i] += point2_imag;
1536
1537 // x[i2] = (x[i] - x[i2]) * w
1538 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1539 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1540
1541 i = j+16;
1542 point1_real = Real[i];
1543 point1_imag = Imag[i];
1544 i2 = i+8;
1545 point2_real = Real[i2];
1546 point2_imag = Imag[i2];
1547
1548 // temp1 = x[i] - x[i2]
1549 point1_real -= point2_real;
1550 point1_imag -= point2_imag;
1551
1552 // x[i1] = x[i] + x[i2]
1553 Real[i] += point2_real;
1554 Imag[i] += point2_imag;
1555
1556 // x[i2] = (x[i] - x[i2]) * w
1557 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1558 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1559 }
1560
1561 // Stage 3 of 32 point FFT decimation in frequency
1562 // 2*4*2=16 multiplications
1563 // 4*4*2+6*4*2=10*8=80 additions
1564 for (i = 0; i < n; i += 8)
1565 {
1566 i2 = i+4;
1567 point1_real = Real[i];
1568 point1_imag = Imag[i];
1569
1570 point2_real = Real[i2];
1571 point2_imag = Imag[i2];
1572
1573 // out[i1] = point1 + point2
1574 Real[i] += point2_real;
1575 Imag[i] += point2_imag;
1576
1577 // out[i2] = point1 - point2
1578 Real[i2] = point1_real - point2_real;
1579 Imag[i2] = point1_imag - point2_imag;
1580 }
1581 w_real = w_array_real[4]; // = sqrt(2)/2
1582 // w_imag = -w_real; // = w_array_imag[4]; // = -sqrt(2)/2
1583 for (i = 1; i < n; i += 8)
1584 {
1585 i2 = i+4;
1586 point1_real = Real[i];
1587 point1_imag = Imag[i];
1588
1589 point2_real = Real[i2];
1590 point2_imag = Imag[i2];
1591
1592 // temp1 = x[i] - x[i2]
1593 point1_real -= point2_real;
1594 point1_imag -= point2_imag;
1595
1596 // x[i1] = x[i] + x[i2]
1597 Real[i] += point2_real;
1598 Imag[i] += point2_imag;
1599
1600 // x[i2] = (x[i] - x[i2]) * w
1601 Real[i2] = MUL_F(point1_real+point1_imag, w_real);
1602 Imag[i2] = MUL_F(point1_imag-point1_real, w_real);
1603 }
1604 for (i = 2; i < n; i += 8)
1605 {
1606 i2 = i+4;
1607 point1_real = Real[i];
1608 point1_imag = Imag[i];
1609
1610 point2_real = Real[i2];
1611 point2_imag = Imag[i2];
1612
1613 // x[i] = x[i] + x[i2]
1614 Real[i] += point2_real;
1615 Imag[i] += point2_imag;
1616
1617 // x[i2] = (x[i] - x[i2]) * (-i)
1618 Real[i2] = point1_imag - point2_imag;
1619 Imag[i2] = point2_real - point1_real;
1620 }
1621 w_real = w_array_real[12]; // = -sqrt(2)/2
1622 // w_imag = w_real; // = w_array_imag[12]; // = -sqrt(2)/2
1623 for (i = 3; i < n; i += 8)
1624 {
1625 i2 = i+4;
1626 point1_real = Real[i];
1627 point1_imag = Imag[i];
1628
1629 point2_real = Real[i2];
1630 point2_imag = Imag[i2];
1631
1632 // temp1 = x[i] - x[i2]
1633 point1_real -= point2_real;
1634 point1_imag -= point2_imag;
1635
1636 // x[i1] = x[i] + x[i2]
1637 Real[i] += point2_real;
1638 Imag[i] += point2_imag;
1639
1640 // x[i2] = (x[i] - x[i2]) * w
1641 Real[i2] = MUL_F(point1_real-point1_imag, w_real);
1642 Imag[i2] = MUL_F(point1_real+point1_imag, w_real);
1643 }
1644
1645
1646 // Stage 4 of 32 point FFT decimation in frequency (no multiplications)
1647 // 16*4=64 additions
1648 for (i = 0; i < n; i += 4)
1649 {
1650 i2 = i+2;
1651 point1_real = Real[i];
1652 point1_imag = Imag[i];
1653
1654 point2_real = Real[i2];
1655 point2_imag = Imag[i2];
1656
1657 // x[i1] = x[i] + x[i2]
1658 Real[i] += point2_real;
1659 Imag[i] += point2_imag;
1660
1661 // x[i2] = x[i] - x[i2]
1662 Real[i2] = point1_real - point2_real;
1663 Imag[i2] = point1_imag - point2_imag;
1664 }
1665 for (i = 1; i < n; i += 4)
1666 {
1667 i2 = i+2;
1668 point1_real = Real[i];
1669 point1_imag = Imag[i];
1670
1671 point2_real = Real[i2];
1672 point2_imag = Imag[i2];
1673
1674 // x[i] = x[i] + x[i2]
1675 Real[i] += point2_real;
1676 Imag[i] += point2_imag;
1677
1678 // x[i2] = (x[i] - x[i2]) * (-i)
1679 Real[i2] = point1_imag - point2_imag;
1680 Imag[i2] = point2_real - point1_real;
1681 }
1682
1683 // Stage 5 of 32 point FFT decimation in frequency (no multiplications)
1684 // 16*4=64 additions
1685 for (i = 0; i < n; i += 2)
1686 {
1687 i2 = i+1;
1688 point1_real = Real[i];
1689 point1_imag = Imag[i];
1690
1691 point2_real = Real[i2];
1692 point2_imag = Imag[i2];
1693
1694 // out[i1] = point1 + point2
1695 Real[i] += point2_real;
1696 Imag[i] += point2_imag;
1697
1698 // out[i2] = point1 - point2
1699 Real[i2] = point1_real - point2_real;
1700 Imag[i2] = point1_imag - point2_imag;
1701 }
1702
1703 #ifdef REORDER_IN_FFT
1704 FFTReorder(Real, Imag);
1705 #endif // #ifdef REORDER_IN_FFT
1706 }
1707 #undef n
1708 #undef log2n
1709
1710 static const real_t dct4_64_tab[] = {
1711 COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507),
1712 COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537),
1713 COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708),
1714 COEF_CONST(0.953306019306183), COEF_CONST(0.937339007854462),
1715 COEF_CONST(0.919113874435425), COEF_CONST(0.898674488067627),
1716 COEF_CONST(0.876070082187653), COEF_CONST(0.851355195045471),
1717 COEF_CONST(0.824589252471924), COEF_CONST(0.795836925506592),
1718 COEF_CONST(0.765167236328125), COEF_CONST(0.732654273509979),
1719 COEF_CONST(0.698376238346100), COEF_CONST(0.662415742874146),
1720 COEF_CONST(0.624859452247620), COEF_CONST(0.585797846317291),
1721 COEF_CONST(0.545324981212616), COEF_CONST(0.503538429737091),
1722 COEF_CONST(0.460538715124130), COEF_CONST(0.416429549455643),
1723 COEF_CONST(0.371317148208618), COEF_CONST(0.325310230255127),
1724 COEF_CONST(0.278519600629807), COEF_CONST(0.231058135628700),
1725 COEF_CONST(0.183039888739586), COEF_CONST(0.134580686688423),
1726 COEF_CONST(0.085797272622585), COEF_CONST(0.036807164549828),
1727 COEF_CONST(-1.012196302413940), COEF_CONST(-1.059438824653626),
1728 COEF_CONST(-1.104129195213318), COEF_CONST(-1.146159529685974),
1729 COEF_CONST(-1.185428738594055), COEF_CONST(-1.221842169761658),
1730 COEF_CONST(-1.255311965942383), COEF_CONST(-1.285757660865784),
1731 COEF_CONST(-1.313105940818787), COEF_CONST(-1.337290763854981),
1732 COEF_CONST(-1.358253836631775), COEF_CONST(-1.375944852828980),
1733 COEF_CONST(-1.390321016311646), COEF_CONST(-1.401347875595093),
1734 COEF_CONST(-1.408998727798462), COEF_CONST(-1.413255214691162),
1735 COEF_CONST(-1.414107084274292), COEF_CONST(-1.411552190780640),
1736 COEF_CONST(-1.405596733093262), COEF_CONST(-1.396255016326904),
1737 COEF_CONST(-1.383549690246582), COEF_CONST(-1.367511272430420),
1738 COEF_CONST(-1.348178386688232), COEF_CONST(-1.325597524642944),
1739 COEF_CONST(-1.299823284149170), COEF_CONST(-1.270917654037476),
1740 COEF_CONST(-1.238950133323669), COEF_CONST(-1.203998088836670),
1741 COEF_CONST(-1.166145324707031), COEF_CONST(-1.125483393669128),
1742 COEF_CONST(-1.082109928131104), COEF_CONST(-1.036129593849182),
1743 COEF_CONST(-0.987653195858002), COEF_CONST(-0.936797380447388),
1744 COEF_CONST(-0.883684754371643), COEF_CONST(-0.828443288803101),
1745 COEF_CONST(-0.771206021308899), COEF_CONST(-0.712110757827759),
1746 COEF_CONST(-0.651300072669983), COEF_CONST(-0.588920354843140),
1747 COEF_CONST(-0.525121808052063), COEF_CONST(-0.460058242082596),
1748 COEF_CONST(-0.393886327743530), COEF_CONST(-0.326765477657318),
1749 COEF_CONST(-0.258857429027557), COEF_CONST(-0.190325915813446),
1750 COEF_CONST(-0.121335685253143), COEF_CONST(-0.052053272724152),
1751 COEF_CONST(0.017354607582092), COEF_CONST(0.086720645427704),
1752 COEF_CONST(0.155877828598022), COEF_CONST(0.224659323692322),
1753 COEF_CONST(0.292899727821350), COEF_CONST(0.360434412956238),
1754 COEF_CONST(0.427100926637650), COEF_CONST(0.492738455533981),
1755 COEF_CONST(0.557188928127289), COEF_CONST(0.620297133922577),
1756 COEF_CONST(0.681910991668701), COEF_CONST(0.741881847381592),
1757 COEF_CONST(0.800065577030182), COEF_CONST(0.856321990489960),
1758 COEF_CONST(0.910515367984772), COEF_CONST(0.962515234947205),
1759 COEF_CONST(1.000000000000000), COEF_CONST(0.998795449733734),
1760 COEF_CONST(0.995184719562531), COEF_CONST(0.989176511764526),
1761 COEF_CONST(0.980785250663757), COEF_CONST(0.970031261444092),
1762 COEF_CONST(0.956940352916718), COEF_CONST(0.941544055938721),
1763 COEF_CONST(0.923879504203796), COEF_CONST(0.903989315032959),
1764 COEF_CONST(0.881921231746674), COEF_CONST(0.857728600502014),
1765 COEF_CONST(0.831469595432281), COEF_CONST(0.803207516670227),
1766 COEF_CONST(0.773010432720184), COEF_CONST(0.740951120853424),
1767 COEF_CONST(0.707106769084930), COEF_CONST(0.671558916568756),
1768 COEF_CONST(0.634393274784088), COEF_CONST(0.595699310302734),
1769 COEF_CONST(0.555570185184479), COEF_CONST(0.514102697372437),
1770 COEF_CONST(0.471396654844284), COEF_CONST(0.427555114030838),
1771 COEF_CONST(0.382683426141739), COEF_CONST(0.336889833211899),
1772 COEF_CONST(0.290284633636475), COEF_CONST(0.242980122566223),
1773 COEF_CONST(0.195090234279633), COEF_CONST(0.146730497479439),
1774 COEF_CONST(0.098017133772373), COEF_CONST(0.049067649990320),
1775 COEF_CONST(-1.000000000000000), COEF_CONST(-1.047863125801086),
1776 COEF_CONST(-1.093201875686646), COEF_CONST(-1.135906934738159),
1777 COEF_CONST(-1.175875544548035), COEF_CONST(-1.213011503219605),
1778 COEF_CONST(-1.247225046157837), COEF_CONST(-1.278433918952942),
1779 COEF_CONST(-1.306562900543213), COEF_CONST(-1.331544399261475),
1780 COEF_CONST(-1.353317975997925), COEF_CONST(-1.371831417083740),
1781 COEF_CONST(-1.387039899826050), COEF_CONST(-1.398906826972961),
1782 COEF_CONST(-1.407403707504273), COEF_CONST(-1.412510156631470),
1783 COEF_CONST(0), COEF_CONST(-1.412510156631470),
1784 COEF_CONST(-1.407403707504273), COEF_CONST(-1.398906826972961),
1785 COEF_CONST(-1.387039899826050), COEF_CONST(-1.371831417083740),
1786 COEF_CONST(-1.353317975997925), COEF_CONST(-1.331544399261475),
1787 COEF_CONST(-1.306562900543213), COEF_CONST(-1.278433918952942),
1788 COEF_CONST(-1.247225046157837), COEF_CONST(-1.213011384010315),
1789 COEF_CONST(-1.175875544548035), COEF_CONST(-1.135907053947449),
1790 COEF_CONST(-1.093201875686646), COEF_CONST(-1.047863125801086),
1791 COEF_CONST(-1.000000000000000), COEF_CONST(-0.949727773666382),
1792 COEF_CONST(-0.897167563438416), COEF_CONST(-0.842446029186249),
1793 COEF_CONST(-0.785694956779480), COEF_CONST(-0.727051079273224),
1794 COEF_CONST(-0.666655659675598), COEF_CONST(-0.604654192924500),
1795 COEF_CONST(-0.541196048259735), COEF_CONST(-0.476434230804443),
1796 COEF_CONST(-0.410524487495422), COEF_CONST(-0.343625843524933),
1797 COEF_CONST(-0.275899350643158), COEF_CONST(-0.207508206367493),
1798 COEF_CONST(-0.138617098331451), COEF_CONST(-0.069392144680023),
1799 COEF_CONST(0), COEF_CONST(0.069392263889313),
1800 COEF_CONST(0.138617157936096), COEF_CONST(0.207508206367493),
1801 COEF_CONST(0.275899469852448), COEF_CONST(0.343625962734222),
1802 COEF_CONST(0.410524636507034), COEF_CONST(0.476434201002121),
1803 COEF_CONST(0.541196107864380), COEF_CONST(0.604654192924500),
1804 COEF_CONST(0.666655719280243), COEF_CONST(0.727051138877869),
1805 COEF_CONST(0.785695075988770), COEF_CONST(0.842446029186249),
1806 COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382)
1807 };
1808
1809 /* size 64 only! */
1810 void dct4_kernel(real_t * in_real, real_t * in_imag, real_t * out_real, real_t * out_imag)
1811 {
1812 // Tables with bit reverse values for 5 bits, bit reverse of i at i-th position
1813 const uint8_t bit_rev_tab[32] = { 0,16,8,24,4,20,12,28,2,18,10,26,6,22,14,30,1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31 };
1814 uint16_t i, i_rev;
1815
1816 /* Step 2: modulate */
1817 // 3*32=96 multiplications
1818 // 3*32=96 additions
1819 for (i = 0; i < 32; i++)
1820 {
1821 real_t x_re, x_im, tmp;
1822 x_re = in_real[i];
1823 x_im = in_imag[i];
1824 tmp = MUL_C(x_re + x_im, dct4_64_tab[i]);
1825 in_real[i] = MUL_C(x_im, dct4_64_tab[i + 64]) + tmp;
1826 in_imag[i] = MUL_C(x_re, dct4_64_tab[i + 32]) + tmp;
1827 }
1828
1829 /* Step 3: FFT, but with output in bit reverse order */
1830 fft_dif(in_real, in_imag);
1831
1832 /* Step 4: modulate + bitreverse reordering */
1833 // 3*31+2=95 multiplications
1834 // 3*31+2=95 additions
1835 for (i = 0; i < 16; i++)
1836 {
1837 real_t x_re, x_im, tmp;
1838 i_rev = bit_rev_tab[i];
1839 x_re = in_real[i_rev];
1840 x_im = in_imag[i_rev];
1841
1842 tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);
1843 out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp;
1844 out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp;
1845 }
1846 // i = 16, i_rev = 1 = rev(16);
1847 out_imag[16] = MUL_C(in_imag[1] - in_real[1], dct4_64_tab[16 + 3*32]);
1848 out_real[16] = MUL_C(in_real[1] + in_imag[1], dct4_64_tab[16 + 3*32]);
1849 for (i = 17; i < 32; i++)
1850 {
1851 real_t x_re, x_im, tmp;
1852 i_rev = bit_rev_tab[i];
1853 x_re = in_real[i_rev];
1854 x_im = in_imag[i_rev];
1855 tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);
1856 out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp;
1857 out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp;
1858 }
1859
1860 }
1861
1862 void DST4_32(real_t *y, real_t *x) 490 void DST4_32(real_t *y, real_t *x)
1863 { 491 {
1864 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9; 492 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9;
1865 real_t f10, f11, f12, f13, f14, f15, f16, f17, f18, f19; 493 real_t f10, f11, f12, f13, f14, f15, f16, f17, f18, f19;
1866 real_t f20, f21, f22, f23, f24, f25, f26, f27, f28, f29; 494 real_t f20, f21, f22, f23, f24, f25, f26, f27, f28, f29;
2264 y[2] = MUL_C(COEF_CONST(4.0846110781292477), f308); 892 y[2] = MUL_C(COEF_CONST(4.0846110781292477), f308);
2265 y[1] = MUL_C(COEF_CONST(6.7967507116736332), f306); 893 y[1] = MUL_C(COEF_CONST(6.7967507116736332), f306);
2266 y[0] = MUL_R(REAL_CONST(20.3738781672314530), f304); 894 y[0] = MUL_R(REAL_CONST(20.3738781672314530), f304);
2267 } 895 }
2268 896
897 #ifdef SBR_LOW_POWER
898
899 void DCT2_16_unscaled(real_t *y, real_t *x)
900 {
901 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10;
902 real_t f11, f12, f13, f14, f15, f16, f17, f18, f19, f20;
903 real_t f21, f22, f23, f24, f25, f26, f27, f28, f31, f32;
904 real_t f33, f34, f37, f38, f39, f40, f41, f42, f43, f44;
905 real_t f45, f46, f47, f48, f49, f51, f53, f54, f57, f58;
906 real_t f59, f60, f61, f62, f63, f64, f65, f66, f67, f68;
907 real_t f69, f70, f71, f72, f73, f74, f75, f76, f77, f78;
908 real_t f79, f80, f81, f82, f83, f84, f85, f86, f87, f88;
909 real_t f89, f90, f91, f92, f95, f96, f97, f98, f101, f102;
910 real_t f103, f104, f107, f108, f109, f110;
911
912 f0 = x[0] - x[15];
913 f1 = x[0] + x[15];
914 f2 = x[1] - x[14];
915 f3 = x[1] + x[14];
916 f4 = x[2] - x[13];
917 f5 = x[2] + x[13];
918 f6 = x[3] - x[12];
919 f7 = x[3] + x[12];
920 f8 = x[4] - x[11];
921 f9 = x[4] + x[11];
922 f10 = x[5] - x[10];
923 f11 = x[5] + x[10];
924 f12 = x[6] - x[9];
925 f13 = x[6] + x[9];
926 f14 = x[7] - x[8];
927 f15 = x[7] + x[8];
928 f16 = f1 - f15;
929 f17 = f1 + f15;
930 f18 = f3 - f13;
931 f19 = f3 + f13;
932 f20 = f5 - f11;
933 f21 = f5 + f11;
934 f22 = f7 - f9;
935 f23 = f7 + f9;
936 f24 = f17 - f23;
937 f25 = f17 + f23;
938 f26 = f19 - f21;
939 f27 = f19 + f21;
940 f28 = f25 - f27;
941 y[0] = f25 + f27;
942 y[8] = MUL_F(f28, FRAC_CONST(0.7071067811865476));
943 f31 = f24 + f26;
944 f32 = MUL_C(f24, COEF_CONST(1.3065629648763766));
945 f33 = MUL_F(f31, FRAC_CONST(-0.9238795325112866));
946 f34 = MUL_F(f26, FRAC_CONST(-0.5411961001461967));
947 y[12] = f32 + f33;
948 y[4] = f34 - f33;
949 f37 = f16 + f22;
950 f38 = MUL_C(f16, COEF_CONST(1.1758756024193588));
951 f39 = MUL_F(f37, FRAC_CONST(-0.9807852804032304));
952 f40 = MUL_F(f22, FRAC_CONST(-0.7856949583871021));
953 f41 = f38 + f39;
954 f42 = f40 - f39;
955 f43 = f18 + f20;
956 f44 = MUL_C(f18, COEF_CONST(1.3870398453221473));
957 f45 = MUL_F(f43, FRAC_CONST(-0.8314696123025455));
958 f46 = MUL_F(f20, FRAC_CONST(-0.2758993792829436));
959 f47 = f44 + f45;
960 f48 = f46 - f45;
961 f49 = f42 - f48;
962 y[2] = f42 + f48;
963 f51 = MUL_F(f49, FRAC_CONST(0.7071067811865476));
964 y[14] = f41 - f47;
965 f53 = f41 + f47;
966 f54 = MUL_F(f53, FRAC_CONST(0.7071067811865476));
967 y[10] = f51 - f54;
968 y[6] = f51 + f54;
969 f57 = f2 - f4;
970 f58 = f2 + f4;
971 f59 = f6 - f8;
972 f60 = f6 + f8;
973 f61 = f10 - f12;
974 f62 = f10 + f12;
975 f63 = MUL_F(f60, FRAC_CONST(0.7071067811865476));
976 f64 = f0 - f63;
977 f65 = f0 + f63;
978 f66 = f58 + f62;
979 f67 = MUL_C(f58, COEF_CONST(1.3065629648763766));
980 f68 = MUL_F(f66, FRAC_CONST(-0.9238795325112866));
981 f69 = MUL_F(f62, FRAC_CONST(-0.5411961001461967));
982 f70 = f67 + f68;
983 f71 = f69 - f68;
984 f72 = f65 - f71;
985 f73 = f65 + f71;
986 f74 = f64 - f70;
987 f75 = f64 + f70;
988 f76 = MUL_F(f59, FRAC_CONST(0.7071067811865476));
989 f77 = f14 - f76;
990 f78 = f14 + f76;
991 f79 = f61 + f57;
992 f80 = MUL_C(f61, COEF_CONST(1.3065629648763766));
993 f81 = MUL_F(f79, FRAC_CONST(-0.9238795325112866));
994 f82 = MUL_F(f57, FRAC_CONST(-0.5411961001461967));
995 f83 = f80 + f81;
996 f84 = f82 - f81;
997 f85 = f78 - f84;
998 f86 = f78 + f84;
999 f87 = f77 - f83;
1000 f88 = f77 + f83;
1001 f89 = f86 + f73;
1002 f90 = MUL_F(f86, FRAC_CONST(-0.8971675863426361));
1003 f91 = MUL_F(f89, FRAC_CONST(0.9951847266721968));
1004 f92 = MUL_C(f73, COEF_CONST(1.0932018670017576));
1005 y[1] = f90 + f91;
1006 y[15] = f92 - f91;
1007 f95 = f75 - f88;
1008 f96 = MUL_F(f88, FRAC_CONST(-0.6666556584777466));
1009 f97 = MUL_F(f95, FRAC_CONST(0.9569403357322089));
1010 f98 = MUL_C(f75, COEF_CONST(1.2472250129866713));
1011 y[3] = f97 - f96;
1012 y[13] = f98 - f97;
1013 f101 = f87 + f74;
1014 f102 = MUL_F(f87, FRAC_CONST(-0.4105245275223571));
1015 f103 = MUL_F(f101, FRAC_CONST(0.8819212643483549));
1016 f104 = MUL_C(f74, COEF_CONST(1.3533180011743529));
1017 y[5] = f102 + f103;
1018 y[11] = f104 - f103;
1019 f107 = f72 - f85;
1020 f108 = MUL_F(f85, FRAC_CONST(-0.1386171691990915));
1021 f109 = MUL_F(f107, FRAC_CONST(0.7730104533627370));
1022 f110 = MUL_C(f72, COEF_CONST(1.4074037375263826));
1023 y[7] = f109 - f108;
1024 y[9] = f110 - f109;
1025 }
1026
1027 void DCT4_16(real_t *y, real_t *x)
1028 {
1029 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10;
1030 real_t f11, f12, f13, f14, f15, f16, f17, f18, f19, f20;
1031 real_t f21, f22, f23, f24, f25, f26, f27, f28, f29, f30;
1032 real_t f31, f32, f33, f34, f35, f36, f37, f38, f39, f40;
1033 real_t f41, f42, f43, f44, f45, f46, f47, f48, f49, f50;
1034 real_t f51, f52, f53, f54, f55, f56, f57, f58, f59, f60;
1035 real_t f61, f62, f63, f64, f65, f66, f67, f68, f69, f70;
1036 real_t f71, f72, f73, f74, f75, f76, f77, f78, f79, f80;
1037 real_t f81, f82, f83, f84, f85, f86, f87, f88, f89, f90;
1038 real_t f91, f92, f93, f94, f95, f96, f97, f98, f99, f100;
1039 real_t f101, f102, f103, f104, f105, f106, f107, f108, f109, f110;
1040 real_t f111, f112, f113, f114, f115, f116, f117, f118, f119, f120;
1041 real_t f121, f122, f123, f124, f125, f126, f127, f128, f130, f132;
1042 real_t f134, f136, f138, f140, f142, f144, f145, f148, f149, f152;
1043 real_t f153, f156, f157;
1044
1045 f0 = x[0] + x[15];
1046 f1 = MUL_C(COEF_CONST(1.0478631305325901), x[0]);
1047 f2 = MUL_F(FRAC_CONST(-0.9987954562051724), f0);
1048 f3 = MUL_F(FRAC_CONST(-0.9497277818777548), x[15]);
1049 f4 = f1 + f2;
1050 f5 = f3 - f2;
1051 f6 = x[2] + x[13];
1052 f7 = MUL_C(COEF_CONST(1.2130114330978077), x[2]);
1053 f8 = MUL_F(FRAC_CONST(-0.9700312531945440), f6);
1054 f9 = MUL_F(FRAC_CONST(-0.7270510732912803), x[13]);
1055 f10 = f7 + f8;
1056 f11 = f9 - f8;
1057 f12 = x[4] + x[11];
1058 f13 = MUL_C(COEF_CONST(1.3315443865537255), x[4]);
1059 f14 = MUL_F(FRAC_CONST(-0.9039892931234433), f12);
1060 f15 = MUL_F(FRAC_CONST(-0.4764341996931612), x[11]);
1061 f16 = f13 + f14;
1062 f17 = f15 - f14;
1063 f18 = x[6] + x[9];
1064 f19 = MUL_C(COEF_CONST(1.3989068359730781), x[6]);
1065 f20 = MUL_F(FRAC_CONST(-0.8032075314806453), f18);
1066 f21 = MUL_F(FRAC_CONST(-0.2075082269882124), x[9]);
1067 f22 = f19 + f20;
1068 f23 = f21 - f20;
1069 f24 = x[8] + x[7];
1070 f25 = MUL_C(COEF_CONST(1.4125100802019777), x[8]);
1071 f26 = MUL_F(FRAC_CONST(-0.6715589548470187), f24);
1072 f27 = MUL_F(FRAC_CONST(0.0693921705079402), x[7]);
1073 f28 = f25 + f26;
1074 f29 = f27 - f26;
1075 f30 = x[10] + x[5];
1076 f31 = MUL_C(COEF_CONST(1.3718313541934939), x[10]);
1077 f32 = MUL_F(FRAC_CONST(-0.5141027441932219), f30);
1078 f33 = MUL_F(FRAC_CONST(0.3436258658070501), x[5]);
1079 f34 = f31 + f32;
1080 f35 = f33 - f32;
1081 f36 = x[12] + x[3];
1082 f37 = MUL_C(COEF_CONST(1.2784339185752409), x[12]);
1083 f38 = MUL_F(FRAC_CONST(-0.3368898533922200), f36);
1084 f39 = MUL_F(FRAC_CONST(0.6046542117908008), x[3]);
1085 f40 = f37 + f38;
1086 f41 = f39 - f38;
1087 f42 = x[14] + x[1];
1088 f43 = MUL_C(COEF_CONST(1.1359069844201433), x[14]);
1089 f44 = MUL_F(FRAC_CONST(-0.1467304744553624), f42);
1090 f45 = MUL_F(FRAC_CONST(0.8424460355094185), x[1]);
1091 f46 = f43 + f44;
1092 f47 = f45 - f44;
1093 f48 = f5 - f29;
1094 f49 = f5 + f29;
1095 f50 = f4 - f28;
1096 f51 = f4 + f28;
1097 f52 = f11 - f35;
1098 f53 = f11 + f35;
1099 f54 = f10 - f34;
1100 f55 = f10 + f34;
1101 f56 = f17 - f41;
1102 f57 = f17 + f41;
1103 f58 = f16 - f40;
1104 f59 = f16 + f40;
1105 f60 = f23 - f47;
1106 f61 = f23 + f47;
1107 f62 = f22 - f46;
1108 f63 = f22 + f46;
1109 f64 = f48 + f50;
1110 f65 = MUL_C(COEF_CONST(1.1758756024193588), f48);
1111 f66 = MUL_F(FRAC_CONST(-0.9807852804032304), f64);
1112 f67 = MUL_F(FRAC_CONST(-0.7856949583871021), f50);
1113 f68 = f65 + f66;
1114 f69 = f67 - f66;
1115 f70 = f52 + f54;
1116 f71 = MUL_C(COEF_CONST(1.3870398453221475), f52);
1117 f72 = MUL_F(FRAC_CONST(-0.5555702330196022), f70);
1118 f73 = MUL_F(FRAC_CONST(0.2758993792829431), f54);
1119 f74 = f71 + f72;
1120 f75 = f73 - f72;
1121 f76 = f56 + f58;
1122 f77 = MUL_F(FRAC_CONST(0.7856949583871022), f56);
1123 f78 = MUL_F(FRAC_CONST(0.1950903220161283), f76);
1124 f79 = MUL_C(COEF_CONST(1.1758756024193586), f58);
1125 f80 = f77 + f78;
1126 f81 = f79 - f78;
1127 f82 = f60 + f62;
1128 f83 = MUL_F(FRAC_CONST(-0.2758993792829430), f60);
1129 f84 = MUL_F(FRAC_CONST(0.8314696123025452), f82);
1130 f85 = MUL_C(COEF_CONST(1.3870398453221475), f62);
1131 f86 = f83 + f84;
1132 f87 = f85 - f84;
1133 f88 = f49 - f57;
1134 f89 = f49 + f57;
1135 f90 = f51 - f59;
1136 f91 = f51 + f59;
1137 f92 = f53 - f61;
1138 f93 = f53 + f61;
1139 f94 = f55 - f63;
1140 f95 = f55 + f63;
1141 f96 = f69 - f81;
1142 f97 = f69 + f81;
1143 f98 = f68 - f80;
1144 f99 = f68 + f80;
1145 f100 = f75 - f87;
1146 f101 = f75 + f87;
1147 f102 = f74 - f86;
1148 f103 = f74 + f86;
1149 f104 = f88 + f90;
1150 f105 = MUL_C(COEF_CONST(1.3065629648763766), f88);
1151 f106 = MUL_F(FRAC_CONST(-0.9238795325112866), f104);
1152 f107 = MUL_F(FRAC_CONST(-0.5411961001461967), f90);
1153 f108 = f105 + f106;
1154 f109 = f107 - f106;
1155 f110 = f92 + f94;
1156 f111 = MUL_F(FRAC_CONST(0.5411961001461969), f92);
1157 f112 = MUL_F(FRAC_CONST(0.3826834323650898), f110);
1158 f113 = MUL_C(COEF_CONST(1.3065629648763766), f94);
1159 f114 = f111 + f112;
1160 f115 = f113 - f112;
1161 f116 = f96 + f98;
1162 f117 = MUL_C(COEF_CONST(1.3065629648763766), f96);
1163 f118 = MUL_F(FRAC_CONST(-0.9238795325112866), f116);
1164 f119 = MUL_F(FRAC_CONST(-0.5411961001461967), f98);
1165 f120 = f117 + f118;
1166 f121 = f119 - f118;
1167 f122 = f100 + f102;
1168 f123 = MUL_F(FRAC_CONST(0.5411961001461969), f100);
1169 f124 = MUL_F(FRAC_CONST(0.3826834323650898), f122);
1170 f125 = MUL_C(COEF_CONST(1.3065629648763766), f102);
1171 f126 = f123 + f124;
1172 f127 = f125 - f124;
1173 f128 = f89 - f93;
1174 y[0] = f89 + f93;
1175 f130 = f91 - f95;
1176 y[15] = f91 + f95;
1177 f132 = f109 - f115;
1178 y[3] = f109 + f115;
1179 f134 = f108 - f114;
1180 y[12] = f108 + f114;
1181 f136 = f97 - f101;
1182 y[1] = f97 + f101;
1183 f138 = f99 - f103;
1184 y[14] = f99 + f103;
1185 f140 = f121 - f127;
1186 y[2] = f121 + f127;
1187 f142 = f120 - f126;
1188 y[13] = f120 + f126;
1189 f144 = f128 - f130;
1190 f145 = f128 + f130;
1191 y[8] = MUL_F(FRAC_CONST(0.7071067811865474), f144);
1192 y[7] = MUL_F(FRAC_CONST(0.7071067811865474), f145);
1193 f148 = f132 - f134;
1194 f149 = f132 + f134;
1195 y[11] = MUL_F(FRAC_CONST(0.7071067811865474), f148);
1196 y[4] = MUL_F(FRAC_CONST(0.7071067811865474), f149);
1197 f152 = f136 - f138;
1198 f153 = f136 + f138;
1199 y[9] = MUL_F(FRAC_CONST(0.7071067811865474), f152);
1200 y[6] = MUL_F(FRAC_CONST(0.7071067811865474), f153);
1201 f156 = f140 - f142;
1202 f157 = f140 + f142;
1203 y[10] = MUL_F(FRAC_CONST(0.7071067811865474), f156);
1204 y[5] = MUL_F(FRAC_CONST(0.7071067811865474), f157);
1205 }
1206
1207 void DCT3_32_unscaled(real_t *y, real_t *x)
1208 {
1209 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10;
1210 real_t f11, f12, f13, f14, f15, f16, f17, f18, f19, f20;
1211 real_t f21, f22, f23, f24, f25, f26, f27, f28, f29, f30;
1212 real_t f31, f32, f33, f34, f35, f36, f37, f38, f39, f40;
1213 real_t f41, f42, f43, f44, f45, f46, f47, f48, f49, f50;
1214 real_t f51, f52, f53, f54, f55, f56, f57, f58, f59, f60;
1215 real_t f61, f62, f63, f64, f65, f66, f67, f68, f69, f70;
1216 real_t f71, f72, f73, f74, f75, f76, f77, f78, f79, f80;
1217 real_t f81, f82, f83, f84, f85, f86, f87, f88, f89, f90;
1218 real_t f91, f92, f93, f94, f95, f96, f97, f98, f99, f100;
1219 real_t f101, f102, f103, f104, f105, f106, f107, f108, f109, f110;
1220 real_t f111, f112, f113, f114, f115, f116, f117, f118, f119, f120;
1221 real_t f121, f122, f123, f124, f125, f126, f127, f128, f129, f130;
1222 real_t f131, f132, f133, f134, f135, f136, f137, f138, f139, f140;
1223 real_t f141, f142, f143, f144, f145, f146, f147, f148, f149, f150;
1224 real_t f151, f152, f153, f154, f155, f156, f157, f158, f159, f160;
1225 real_t f161, f162, f163, f164, f165, f166, f167, f168, f169, f170;
1226 real_t f171, f172, f173, f174, f175, f176, f177, f178, f179, f180;
1227 real_t f181, f182, f183, f184, f185, f186, f187, f188, f189, f190;
1228 real_t f191, f192, f193, f194, f195, f196, f197, f198, f199, f200;
1229 real_t f201, f202, f203, f204, f205, f206, f207, f208, f209, f210;
1230 real_t f211, f212, f213, f214, f215, f216, f217, f218, f219, f220;
1231 real_t f221, f222, f223, f224, f225, f226, f227, f228, f229, f230;
1232 real_t f231, f232, f233, f234, f235, f236, f237, f238, f239, f240;
1233 real_t f241, f242, f243, f244, f245, f246, f247, f248, f249, f250;
1234 real_t f251, f252, f253, f254, f255, f256, f257, f258, f259, f260;
1235 real_t f261, f262, f263, f264, f265, f266, f267, f268, f269, f270;
1236 real_t f271, f272;
1237
1238 f0 = MUL_F(x[16], FRAC_CONST(0.7071067811865476));
1239 f1 = x[0] - f0;
1240 f2 = x[0] + f0;
1241 f3 = x[8] + x[24];
1242 f4 = MUL_C(x[8], COEF_CONST(1.3065629648763766));
1243 f5 = MUL_F(f3, FRAC_CONST((-0.9238795325112866)));
1244 f6 = MUL_F(x[24], FRAC_CONST((-0.5411961001461967)));
1245 f7 = f4 + f5;
1246 f8 = f6 - f5;
1247 f9 = f2 - f8;
1248 f10 = f2 + f8;
1249 f11 = f1 - f7;
1250 f12 = f1 + f7;
1251 f13 = x[4] + x[28];
1252 f14 = MUL_C(x[4], COEF_CONST(1.1758756024193588));
1253 f15 = MUL_F(f13, FRAC_CONST((-0.9807852804032304)));
1254 f16 = MUL_F(x[28], FRAC_CONST((-0.7856949583871021)));
1255 f17 = f14 + f15;
1256 f18 = f16 - f15;
1257 f19 = x[12] + x[20];
1258 f20 = MUL_C(x[12], COEF_CONST(1.3870398453221473));
1259 f21 = MUL_F(f19, FRAC_CONST((-0.8314696123025455)));
1260 f22 = MUL_F(x[20], FRAC_CONST((-0.2758993792829436)));
1261 f23 = f20 + f21;
1262 f24 = f22 - f21;
1263 f25 = f18 - f24;
1264 f26 = f18 + f24;
1265 f27 = MUL_F(f25, FRAC_CONST(0.7071067811865476));
1266 f28 = f17 - f23;
1267 f29 = f17 + f23;
1268 f30 = MUL_F(f29, FRAC_CONST(0.7071067811865476));
1269 f31 = f27 - f30;
1270 f32 = f27 + f30;
1271 f33 = f10 - f26;
1272 f34 = f10 + f26;
1273 f35 = f12 - f32;
1274 f36 = f12 + f32;
1275 f37 = f11 - f31;
1276 f38 = f11 + f31;
1277 f39 = f9 - f28;
1278 f40 = f9 + f28;
1279 f41 = x[2] + x[30];
1280 f42 = MUL_C(x[2], COEF_CONST(1.0932018670017569));
1281 f43 = MUL_F(f41, FRAC_CONST((-0.9951847266721969)));
1282 f44 = MUL_F(x[30], FRAC_CONST((-0.8971675863426368)));
1283 f45 = f42 + f43;
1284 f46 = f44 - f43;
1285 f47 = x[6] + x[26];
1286 f48 = MUL_C(x[6], COEF_CONST(1.2472250129866711));
1287 f49 = MUL_F(f47, FRAC_CONST((-0.9569403357322089)));
1288 f50 = MUL_F(x[26], FRAC_CONST((-0.6666556584777469)));
1289 f51 = f48 + f49;
1290 f52 = f50 - f49;
1291 f53 = x[10] + x[22];
1292 f54 = MUL_C(x[10], COEF_CONST(1.3533180011743526));
1293 f55 = MUL_F(f53, FRAC_CONST((-0.8819212643483551)));
1294 f56 = MUL_F(x[22], FRAC_CONST((-0.4105245275223575)));
1295 f57 = f54 + f55;
1296 f58 = f56 - f55;
1297 f59 = x[14] + x[18];
1298 f60 = MUL_C(x[14], COEF_CONST(1.4074037375263826));
1299 f61 = MUL_F(f59, FRAC_CONST((-0.7730104533627369)));
1300 f62 = MUL_F(x[18], FRAC_CONST((-0.1386171691990913)));
1301 f63 = f60 + f61;
1302 f64 = f62 - f61;
1303 f65 = f46 - f64;
1304 f66 = f46 + f64;
1305 f67 = f52 - f58;
1306 f68 = f52 + f58;
1307 f69 = f66 - f68;
1308 f70 = f66 + f68;
1309 f71 = MUL_F(f69, FRAC_CONST(0.7071067811865476));
1310 f72 = f65 + f67;
1311 f73 = MUL_C(f65, COEF_CONST(1.3065629648763766));
1312 f74 = MUL_F(f72, FRAC_CONST((-0.9238795325112866)));
1313 f75 = MUL_F(f67, FRAC_CONST((-0.5411961001461967)));
1314 f76 = f73 + f74;
1315 f77 = f75 - f74;
1316 f78 = f45 - f63;
1317 f79 = f45 + f63;
1318 f80 = f51 - f57;
1319 f81 = f51 + f57;
1320 f82 = f79 + f81;
1321 f83 = MUL_C(f79, COEF_CONST(1.3065629648763770));
1322 f84 = MUL_F(f82, FRAC_CONST((-0.3826834323650904)));
1323 f85 = MUL_F(f81, FRAC_CONST(0.5411961001461961));
1324 f86 = f83 + f84;
1325 f87 = f85 - f84;
1326 f88 = f78 - f80;
1327 f89 = f78 + f80;
1328 f90 = MUL_F(f89, FRAC_CONST(0.7071067811865476));
1329 f91 = f77 - f87;
1330 f92 = f77 + f87;
1331 f93 = f71 - f90;
1332 f94 = f71 + f90;
1333 f95 = f76 - f86;
1334 f96 = f76 + f86;
1335 f97 = f34 - f70;
1336 f98 = f34 + f70;
1337 f99 = f36 - f92;
1338 f100 = f36 + f92;
1339 f101 = f38 - f91;
1340 f102 = f38 + f91;
1341 f103 = f40 - f94;
1342 f104 = f40 + f94;
1343 f105 = f39 - f93;
1344 f106 = f39 + f93;
1345 f107 = f37 - f96;
1346 f108 = f37 + f96;
1347 f109 = f35 - f95;
1348 f110 = f35 + f95;
1349 f111 = f33 - f88;
1350 f112 = f33 + f88;
1351 f113 = x[1] + x[31];
1352 f114 = MUL_C(x[1], COEF_CONST(1.0478631305325901));
1353 f115 = MUL_F(f113, FRAC_CONST((-0.9987954562051724)));
1354 f116 = MUL_F(x[31], FRAC_CONST((-0.9497277818777548)));
1355 f117 = f114 + f115;
1356 f118 = f116 - f115;
1357 f119 = x[5] + x[27];
1358 f120 = MUL_C(x[5], COEF_CONST(1.2130114330978077));
1359 f121 = MUL_F(f119, FRAC_CONST((-0.9700312531945440)));
1360 f122 = MUL_F(x[27], FRAC_CONST((-0.7270510732912803)));
1361 f123 = f120 + f121;
1362 f124 = f122 - f121;
1363 f125 = x[9] + x[23];
1364 f126 = MUL_C(x[9], COEF_CONST(1.3315443865537255));
1365 f127 = MUL_F(f125, FRAC_CONST((-0.9039892931234433)));
1366 f128 = MUL_F(x[23], FRAC_CONST((-0.4764341996931612)));
1367 f129 = f126 + f127;
1368 f130 = f128 - f127;
1369 f131 = x[13] + x[19];
1370 f132 = MUL_C(x[13], COEF_CONST(1.3989068359730781));
1371 f133 = MUL_F(f131, FRAC_CONST((-0.8032075314806453)));
1372 f134 = MUL_F(x[19], FRAC_CONST((-0.2075082269882124)));
1373 f135 = f132 + f133;
1374 f136 = f134 - f133;
1375 f137 = x[17] + x[15];
1376 f138 = MUL_C(x[17], COEF_CONST(1.4125100802019777));
1377 f139 = MUL_F(f137, FRAC_CONST((-0.6715589548470187)));
1378 f140 = MUL_F(x[15], FRAC_CONST(0.0693921705079402));
1379 f141 = f138 + f139;
1380 f142 = f140 - f139;
1381 f143 = x[21] + x[11];
1382 f144 = MUL_C(x[21], COEF_CONST(1.3718313541934939));
1383 f145 = MUL_F(f143, FRAC_CONST((-0.5141027441932219)));
1384 f146 = MUL_F(x[11], FRAC_CONST(0.3436258658070501));
1385 f147 = f144 + f145;
1386 f148 = f146 - f145;
1387 f149 = x[25] + x[7];
1388 f150 = MUL_C(x[25], COEF_CONST(1.2784339185752409));
1389 f151 = MUL_F(f149, FRAC_CONST((-0.3368898533922200)));
1390 f152 = MUL_F(x[7], FRAC_CONST(0.6046542117908008));
1391 f153 = f150 + f151;
1392 f154 = f152 - f151;
1393 f155 = x[29] + x[3];
1394 f156 = MUL_C(x[29], COEF_CONST(1.1359069844201433));
1395 f157 = MUL_F(f155, FRAC_CONST((-0.1467304744553624)));
1396 f158 = MUL_F(x[3], FRAC_CONST(0.8424460355094185));
1397 f159 = f156 + f157;
1398 f160 = f158 - f157;
1399 f161 = f118 - f142;
1400 f162 = f118 + f142;
1401 f163 = f117 - f141;
1402 f164 = f117 + f141;
1403 f165 = f124 - f148;
1404 f166 = f124 + f148;
1405 f167 = f123 - f147;
1406 f168 = f123 + f147;
1407 f169 = f130 - f154;
1408 f170 = f130 + f154;
1409 f171 = f129 - f153;
1410 f172 = f129 + f153;
1411 f173 = f136 - f160;
1412 f174 = f136 + f160;
1413 f175 = f135 - f159;
1414 f176 = f135 + f159;
1415 f177 = f161 + f163;
1416 f178 = MUL_C(f161, COEF_CONST(1.1758756024193588));
1417 f179 = MUL_F(f177, FRAC_CONST((-0.9807852804032304)));
1418 f180 = MUL_F(f163, FRAC_CONST((-0.7856949583871021)));
1419 f181 = f178 + f179;
1420 f182 = f180 - f179;
1421 f183 = f165 + f167;
1422 f184 = MUL_C(f165, COEF_CONST(1.3870398453221475));
1423 f185 = MUL_F(f183, FRAC_CONST((-0.5555702330196022)));
1424 f186 = MUL_F(f167, FRAC_CONST(0.2758993792829431));
1425 f187 = f184 + f185;
1426 f188 = f186 - f185;
1427 f189 = f169 + f171;
1428 f190 = MUL_F(f169, FRAC_CONST(0.7856949583871022));
1429 f191 = MUL_F(f189, FRAC_CONST(0.1950903220161283));
1430 f192 = MUL_C(f171, COEF_CONST(1.1758756024193586));
1431 f193 = f190 + f191;
1432 f194 = f192 - f191;
1433 f195 = f173 + f175;
1434 f196 = MUL_F(f173, FRAC_CONST((-0.2758993792829430)));
1435 f197 = MUL_F(f195, FRAC_CONST(0.8314696123025452));
1436 f198 = MUL_C(f175, COEF_CONST(1.3870398453221475));
1437 f199 = f196 + f197;
1438 f200 = f198 - f197;
1439 f201 = f162 - f170;
1440 f202 = f162 + f170;
1441 f203 = f164 - f172;
1442 f204 = f164 + f172;
1443 f205 = f166 - f174;
1444 f206 = f166 + f174;
1445 f207 = f168 - f176;
1446 f208 = f168 + f176;
1447 f209 = f182 - f194;
1448 f210 = f182 + f194;
1449 f211 = f181 - f193;
1450 f212 = f181 + f193;
1451 f213 = f188 - f200;
1452 f214 = f188 + f200;
1453 f215 = f187 - f199;
1454 f216 = f187 + f199;
1455 f217 = f201 + f203;
1456 f218 = MUL_C(f201, COEF_CONST(1.3065629648763766));
1457 f219 = MUL_F(f217, FRAC_CONST((-0.9238795325112866)));
1458 f220 = MUL_F(f203, FRAC_CONST((-0.5411961001461967)));
1459 f221 = f218 + f219;
1460 f222 = f220 - f219;
1461 f223 = f205 + f207;
1462 f224 = MUL_F(f205, FRAC_CONST(0.5411961001461969));
1463 f225 = MUL_F(f223, FRAC_CONST(0.3826834323650898));
1464 f226 = MUL_C(f207, COEF_CONST(1.3065629648763766));
1465 f227 = f224 + f225;
1466 f228 = f226 - f225;
1467 f229 = f209 + f211;
1468 f230 = MUL_C(f209, COEF_CONST(1.3065629648763766));
1469 f231 = MUL_F(f229, FRAC_CONST((-0.9238795325112866)));
1470 f232 = MUL_F(f211, FRAC_CONST((-0.5411961001461967)));
1471 f233 = f230 + f231;
1472 f234 = f232 - f231;
1473 f235 = f213 + f215;
1474 f236 = MUL_F(f213, FRAC_CONST(0.5411961001461969));
1475 f237 = MUL_F(f235, FRAC_CONST(0.3826834323650898));
1476 f238 = MUL_C(f215, COEF_CONST(1.3065629648763766));
1477 f239 = f236 + f237;
1478 f240 = f238 - f237;
1479 f241 = f202 - f206;
1480 f242 = f202 + f206;
1481 f243 = f204 - f208;
1482 f244 = f204 + f208;
1483 f245 = f222 - f228;
1484 f246 = f222 + f228;
1485 f247 = f221 - f227;
1486 f248 = f221 + f227;
1487 f249 = f210 - f214;
1488 f250 = f210 + f214;
1489 f251 = f212 - f216;
1490 f252 = f212 + f216;
1491 f253 = f234 - f240;
1492 f254 = f234 + f240;
1493 f255 = f233 - f239;
1494 f256 = f233 + f239;
1495 f257 = f241 - f243;
1496 f258 = f241 + f243;
1497 f259 = MUL_F(f257, FRAC_CONST(0.7071067811865474));
1498 f260 = MUL_F(f258, FRAC_CONST(0.7071067811865474));
1499 f261 = f245 - f247;
1500 f262 = f245 + f247;
1501 f263 = MUL_F(f261, FRAC_CONST(0.7071067811865474));
1502 f264 = MUL_F(f262, FRAC_CONST(0.7071067811865474));
1503 f265 = f249 - f251;
1504 f266 = f249 + f251;
1505 f267 = MUL_F(f265, FRAC_CONST(0.7071067811865474));
1506 f268 = MUL_F(f266, FRAC_CONST(0.7071067811865474));
1507 f269 = f253 - f255;
1508 f270 = f253 + f255;
1509 f271 = MUL_F(f269, FRAC_CONST(0.7071067811865474));
1510 f272 = MUL_F(f270, FRAC_CONST(0.7071067811865474));
1511 y[31] = f98 - f242;
1512 y[0] = f98 + f242;
1513 y[30] = f100 - f250;
1514 y[1] = f100 + f250;
1515 y[29] = f102 - f254;
1516 y[2] = f102 + f254;
1517 y[28] = f104 - f246;
1518 y[3] = f104 + f246;
1519 y[27] = f106 - f264;
1520 y[4] = f106 + f264;
1521 y[26] = f108 - f272;
1522 y[5] = f108 + f272;
1523 y[25] = f110 - f268;
1524 y[6] = f110 + f268;
1525 y[24] = f112 - f260;
1526 y[7] = f112 + f260;
1527 y[23] = f111 - f259;
1528 y[8] = f111 + f259;
1529 y[22] = f109 - f267;
1530 y[9] = f109 + f267;
1531 y[21] = f107 - f271;
1532 y[10] = f107 + f271;
1533 y[20] = f105 - f263;
1534 y[11] = f105 + f263;
1535 y[19] = f103 - f248;
1536 y[12] = f103 + f248;
1537 y[18] = f101 - f256;
1538 y[13] = f101 + f256;
1539 y[17] = f99 - f252;
1540 y[14] = f99 + f252;
1541 y[16] = f97 - f244;
1542 y[15] = f97 + f244;
1543 }
1544
1545 void DCT2_32_unscaled(real_t *y, real_t *x)
1546 {
1547 real_t f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10;
1548 real_t f11, f12, f13, f14, f15, f16, f17, f18, f19, f20;
1549 real_t f21, f22, f23, f24, f25, f26, f27, f28, f29, f30;
1550 real_t f31, f32, f33, f34, f35, f36, f37, f38, f39, f40;
1551 real_t f41, f42, f43, f44, f45, f46, f47, f48, f49, f50;
1552 real_t f51, f52, f53, f54, f55, f56, f57, f58, f59, f60;
1553 real_t f63, f64, f65, f66, f69, f70, f71, f72, f73, f74;
1554 real_t f75, f76, f77, f78, f79, f80, f81, f83, f85, f86;
1555 real_t f89, f90, f91, f92, f93, f94, f95, f96, f97, f98;
1556 real_t f99, f100, f101, f102, f103, f104, f105, f106, f107, f108;
1557 real_t f109, f110, f111, f112, f113, f114, f115, f116, f117, f118;
1558 real_t f119, f120, f121, f122, f123, f124, f127, f128, f129, f130;
1559 real_t f133, f134, f135, f136, f139, f140, f141, f142, f145, f146;
1560 real_t f147, f148, f149, f150, f151, f152, f153, f154, f155, f156;
1561 real_t f157, f158, f159, f160, f161, f162, f163, f164, f165, f166;
1562 real_t f167, f168, f169, f170, f171, f172, f173, f174, f175, f176;
1563 real_t f177, f178, f179, f180, f181, f182, f183, f184, f185, f186;
1564 real_t f187, f188, f189, f190, f191, f192, f193, f194, f195, f196;
1565 real_t f197, f198, f199, f200, f201, f202, f203, f204, f205, f206;
1566 real_t f207, f208, f209, f210, f211, f212, f213, f214, f215, f216;
1567 real_t f217, f218, f219, f220, f221, f222, f223, f224, f225, f226;
1568 real_t f227, f228, f229, f230, f231, f232, f233, f234, f235, f236;
1569 real_t f237, f238, f239, f240, f241, f242, f243, f244, f247, f248;
1570 real_t f249, f250, f253, f254, f255, f256, f259, f260, f261, f262;
1571 real_t f265, f266, f267, f268, f271, f272, f273, f274, f277, f278;
1572 real_t f279, f280, f283, f284, f285, f286;
1573
1574 f0 = x[0] - x[31];
1575 f1 = x[0] + x[31];
1576 f2 = x[1] - x[30];
1577 f3 = x[1] + x[30];
1578 f4 = x[2] - x[29];
1579 f5 = x[2] + x[29];
1580 f6 = x[3] - x[28];
1581 f7 = x[3] + x[28];
1582 f8 = x[4] - x[27];
1583 f9 = x[4] + x[27];
1584 f10 = x[5] - x[26];
1585 f11 = x[5] + x[26];
1586 f12 = x[6] - x[25];
1587 f13 = x[6] + x[25];
1588 f14 = x[7] - x[24];
1589 f15 = x[7] + x[24];
1590 f16 = x[8] - x[23];
1591 f17 = x[8] + x[23];
1592 f18 = x[9] - x[22];
1593 f19 = x[9] + x[22];
1594 f20 = x[10] - x[21];
1595 f21 = x[10] + x[21];
1596 f22 = x[11] - x[20];
1597 f23 = x[11] + x[20];
1598 f24 = x[12] - x[19];
1599 f25 = x[12] + x[19];
1600 f26 = x[13] - x[18];
1601 f27 = x[13] + x[18];
1602 f28 = x[14] - x[17];
1603 f29 = x[14] + x[17];
1604 f30 = x[15] - x[16];
1605 f31 = x[15] + x[16];
1606 f32 = f1 - f31;
1607 f33 = f1 + f31;
1608 f34 = f3 - f29;
1609 f35 = f3 + f29;
1610 f36 = f5 - f27;
1611 f37 = f5 + f27;
1612 f38 = f7 - f25;
1613 f39 = f7 + f25;
1614 f40 = f9 - f23;
1615 f41 = f9 + f23;
1616 f42 = f11 - f21;
1617 f43 = f11 + f21;
1618 f44 = f13 - f19;
1619 f45 = f13 + f19;
1620 f46 = f15 - f17;
1621 f47 = f15 + f17;
1622 f48 = f33 - f47;
1623 f49 = f33 + f47;
1624 f50 = f35 - f45;
1625 f51 = f35 + f45;
1626 f52 = f37 - f43;
1627 f53 = f37 + f43;
1628 f54 = f39 - f41;
1629 f55 = f39 + f41;
1630 f56 = f49 - f55;
1631 f57 = f49 + f55;
1632 f58 = f51 - f53;
1633 f59 = f51 + f53;
1634 f60 = f57 - f59;
1635 y[0] = f57 + f59;
1636 y[16] = MUL_F(FRAC_CONST(0.7071067811865476), f60);
1637 f63 = f56 + f58;
1638 f64 = MUL_C(COEF_CONST(1.3065629648763766), f56);
1639 f65 = MUL_F(FRAC_CONST(-0.9238795325112866), f63);
1640 f66 = MUL_F(FRAC_CONST(-0.5411961001461967), f58);
1641 y[24] = f64 + f65;
1642 y[8] = f66 - f65;
1643 f69 = f48 + f54;
1644 f70 = MUL_C(COEF_CONST(1.1758756024193588), f48);
1645 f71 = MUL_F(FRAC_CONST(-0.9807852804032304), f69);
1646 f72 = MUL_F(FRAC_CONST(-0.7856949583871021), f54);
1647 f73 = f70 + f71;
1648 f74 = f72 - f71;
1649 f75 = f50 + f52;
1650 f76 = MUL_C(COEF_CONST(1.3870398453221473), f50);
1651 f77 = MUL_F(FRAC_CONST(-0.8314696123025455), f75);
1652 f78 = MUL_F(FRAC_CONST(-0.2758993792829436), f52);
1653 f79 = f76 + f77;
1654 f80 = f78 - f77;
1655 f81 = f74 - f80;
1656 y[4] = f74 + f80;
1657 f83 = MUL_F(FRAC_CONST(0.7071067811865476), f81);
1658 y[28] = f73 - f79;
1659 f85 = f73 + f79;
1660 f86 = MUL_F(FRAC_CONST(0.7071067811865476), f85);
1661 y[20] = f83 - f86;
1662 y[12] = f83 + f86;
1663 f89 = f34 - f36;
1664 f90 = f34 + f36;
1665 f91 = f38 - f40;
1666 f92 = f38 + f40;
1667 f93 = f42 - f44;
1668 f94 = f42 + f44;
1669 f95 = MUL_F(FRAC_CONST(0.7071067811865476), f92);
1670 f96 = f32 - f95;
1671 f97 = f32 + f95;
1672 f98 = f90 + f94;
1673 f99 = MUL_C(COEF_CONST(1.3065629648763766), f90);
1674 f100 = MUL_F(FRAC_CONST(-0.9238795325112866), f98);
1675 f101 = MUL_F(FRAC_CONST(-0.5411961001461967), f94);
1676 f102 = f99 + f100;
1677 f103 = f101 - f100;
1678 f104 = f97 - f103;
1679 f105 = f97 + f103;
1680 f106 = f96 - f102;
1681 f107 = f96 + f102;
1682 f108 = MUL_F(FRAC_CONST(0.7071067811865476), f91);
1683 f109 = f46 - f108;
1684 f110 = f46 + f108;
1685 f111 = f93 + f89;
1686 f112 = MUL_C(COEF_CONST(1.3065629648763766), f93);
1687 f113 = MUL_F(FRAC_CONST(-0.9238795325112866), f111);
1688 f114 = MUL_F(FRAC_CONST(-0.5411961001461967), f89);
1689 f115 = f112 + f113;
1690 f116 = f114 - f113;
1691 f117 = f110 - f116;
1692 f118 = f110 + f116;
1693 f119 = f109 - f115;
1694 f120 = f109 + f115;
1695 f121 = f118 + f105;
1696 f122 = MUL_F(FRAC_CONST(-0.8971675863426361), f118);
1697 f123 = MUL_F(FRAC_CONST(0.9951847266721968), f121);
1698 f124 = MUL_C(COEF_CONST(1.0932018670017576), f105);
1699 y[2] = f122 + f123;
1700 y[30] = f124 - f123;
1701 f127 = f107 - f120;
1702 f128 = MUL_F(FRAC_CONST(-0.6666556584777466), f120);
1703 f129 = MUL_F(FRAC_CONST(0.9569403357322089), f127);
1704 f130 = MUL_C(COEF_CONST(1.2472250129866713), f107);
1705 y[6] = f129 - f128;
1706 y[26] = f130 - f129;
1707 f133 = f119 + f106;
1708 f134 = MUL_F(FRAC_CONST(-0.4105245275223571), f119);
1709 f135 = MUL_F(FRAC_CONST(0.8819212643483549), f133);
1710 f136 = MUL_C(COEF_CONST(1.3533180011743529), f106);
1711 y[10] = f134 + f135;
1712 y[22] = f136 - f135;
1713 f139 = f104 - f117;
1714 f140 = MUL_F(FRAC_CONST(-0.1386171691990915), f117);
1715 f141 = MUL_F(FRAC_CONST(0.7730104533627370), f139);
1716 f142 = MUL_C(COEF_CONST(1.4074037375263826), f104);
1717 y[14] = f141 - f140;
1718 y[18] = f142 - f141;
1719 f145 = f2 - f4;
1720 f146 = f2 + f4;
1721 f147 = f6 - f8;
1722 f148 = f6 + f8;
1723 f149 = f10 - f12;
1724 f150 = f10 + f12;
1725 f151 = f14 - f16;
1726 f152 = f14 + f16;
1727 f153 = f18 - f20;
1728 f154 = f18 + f20;
1729 f155 = f22 - f24;
1730 f156 = f22 + f24;
1731 f157 = f26 - f28;
1732 f158 = f26 + f28;
1733 f159 = MUL_F(FRAC_CONST(0.7071067811865476), f152);
1734 f160 = f0 - f159;
1735 f161 = f0 + f159;
1736 f162 = f148 + f156;
1737 f163 = MUL_C(COEF_CONST(1.3065629648763766), f148);
1738 f164 = MUL_F(FRAC_CONST(-0.9238795325112866), f162);
1739 f165 = MUL_F(FRAC_CONST(-0.5411961001461967), f156);
1740 f166 = f163 + f164;
1741 f167 = f165 - f164;
1742 f168 = f161 - f167;
1743 f169 = f161 + f167;
1744 f170 = f160 - f166;
1745 f171 = f160 + f166;
1746 f172 = f146 + f158;
1747 f173 = MUL_C(COEF_CONST(1.1758756024193588), f146);
1748 f174 = MUL_F(FRAC_CONST(-0.9807852804032304), f172);
1749 f175 = MUL_F(FRAC_CONST(-0.7856949583871021), f158);
1750 f176 = f173 + f174;
1751 f177 = f175 - f174;
1752 f178 = f150 + f154;
1753 f179 = MUL_C(COEF_CONST(1.3870398453221473), f150);
1754 f180 = MUL_F(FRAC_CONST(-0.8314696123025455), f178);
1755 f181 = MUL_F(FRAC_CONST(-0.2758993792829436), f154);
1756 f182 = f179 + f180;
1757 f183 = f181 - f180;
1758 f184 = f177 - f183;
1759 f185 = f177 + f183;
1760 f186 = MUL_F(FRAC_CONST(0.7071067811865476), f184);
1761 f187 = f176 - f182;
1762 f188 = f176 + f182;
1763 f189 = MUL_F(FRAC_CONST(0.7071067811865476), f188);
1764 f190 = f186 - f189;
1765 f191 = f186 + f189;
1766 f192 = f169 - f185;
1767 f193 = f169 + f185;
1768 f194 = f171 - f191;
1769 f195 = f171 + f191;
1770 f196 = f170 - f190;
1771 f197 = f170 + f190;
1772 f198 = f168 - f187;
1773 f199 = f168 + f187;
1774 f200 = MUL_F(FRAC_CONST(0.7071067811865476), f151);
1775 f201 = f30 - f200;
1776 f202 = f30 + f200;
1777 f203 = f155 + f147;
1778 f204 = MUL_C(COEF_CONST(1.3065629648763766), f155);
1779 f205 = MUL_F(FRAC_CONST(-0.9238795325112866), f203);
1780 f206 = MUL_F(FRAC_CONST(-0.5411961001461967), f147);
1781 f207 = f204 + f205;
1782 f208 = f206 - f205;
1783 f209 = f202 - f208;
1784 f210 = f202 + f208;
1785 f211 = f201 - f207;
1786 f212 = f201 + f207;
1787 f213 = f157 + f145;
1788 f214 = MUL_C(COEF_CONST(1.1758756024193588), f157);
1789 f215 = MUL_F(FRAC_CONST(-0.9807852804032304), f213);
1790 f216 = MUL_F(FRAC_CONST(-0.7856949583871021), f145);
1791 f217 = f214 + f215;
1792 f218 = f216 - f215;
1793 f219 = f153 + f149;
1794 f220 = MUL_C(COEF_CONST(1.3870398453221473), f153);
1795 f221 = MUL_F(FRAC_CONST(-0.8314696123025455), f219);
1796 f222 = MUL_F(FRAC_CONST(-0.2758993792829436), f149);
1797 f223 = f220 + f221;
1798 f224 = f222 - f221;
1799 f225 = f218 - f224;
1800 f226 = f218 + f224;
1801 f227 = MUL_F(FRAC_CONST(0.7071067811865476), f225);
1802 f228 = f217 - f223;
1803 f229 = f217 + f223;
1804 f230 = MUL_F(FRAC_CONST(0.7071067811865476), f229);
1805 f231 = f227 - f230;
1806 f232 = f227 + f230;
1807 f233 = f210 - f226;
1808 f234 = f210 + f226;
1809 f235 = f212 - f232;
1810 f236 = f212 + f232;
1811 f237 = f211 - f231;
1812 f238 = f211 + f231;
1813 f239 = f209 - f228;
1814 f240 = f209 + f228;
1815 f241 = f234 + f193;
1816 f242 = MUL_F(FRAC_CONST(-0.9497277818777543), f234);
1817 f243 = MUL_F(FRAC_CONST(0.9987954562051724), f241);
1818 f244 = MUL_C(COEF_CONST(1.0478631305325905), f193);
1819 y[1] = f242 + f243;
1820 y[31] = f244 - f243;
1821 f247 = f195 - f236;
1822 f248 = MUL_F(FRAC_CONST(-0.8424460355094192), f236);
1823 f249 = MUL_F(FRAC_CONST(0.9891765099647810), f247);
1824 f250 = MUL_C(COEF_CONST(1.1359069844201428), f195);
1825 y[3] = f249 - f248;
1826 y[29] = f250 - f249;
1827 f253 = f238 + f197;
1828 f254 = MUL_F(FRAC_CONST(-0.7270510732912801), f238);
1829 f255 = MUL_F(FRAC_CONST(0.9700312531945440), f253);
1830 f256 = MUL_C(COEF_CONST(1.2130114330978079), f197);
1831 y[5] = f254 + f255;
1832 y[27] = f256 - f255;
1833 f259 = f199 - f240;
1834 f260 = MUL_F(FRAC_CONST(-0.6046542117908007), f240);
1835 f261 = MUL_F(FRAC_CONST(0.9415440651830208), f259);
1836 f262 = MUL_C(COEF_CONST(1.2784339185752409), f199);
1837 y[7] = f261 - f260;
1838 y[25] = f262 - f261;
1839 f265 = f239 + f198;
1840 f266 = MUL_F(FRAC_CONST(-0.4764341996931611), f239);
1841 f267 = MUL_F(FRAC_CONST(0.9039892931234433), f265);
1842 f268 = MUL_C(COEF_CONST(1.3315443865537255), f198);
1843 y[9] = f266 + f267;
1844 y[23] = f268 - f267;
1845 f271 = f196 - f237;
1846 f272 = MUL_F(FRAC_CONST(-0.3436258658070505), f237);
1847 f273 = MUL_F(FRAC_CONST(0.8577286100002721), f271);
1848 f274 = MUL_C(COEF_CONST(1.3718313541934939), f196);
1849 y[11] = f273 - f272;
1850 y[21] = f274 - f273;
1851 f277 = f235 + f194;
1852 f278 = MUL_F(FRAC_CONST(-0.2075082269882114), f235);
1853 f279 = MUL_F(FRAC_CONST(0.8032075314806448), f277);
1854 f280 = MUL_C(COEF_CONST(1.3989068359730783), f194);
1855 y[13] = f278 + f279;
1856 y[19] = f280 - f279;
1857 f283 = f192 - f233;
1858 f284 = MUL_F(FRAC_CONST(-0.0693921705079408), f233);
1859 f285 = MUL_F(FRAC_CONST(0.7409511253549591), f283);
1860 f286 = MUL_C(COEF_CONST(1.4125100802019774), f192);
1861 y[15] = f285 - f284;
1862 y[17] = f286 - f285;
1863 }
1864
1865 #else
1866
1867
1868 #define n 32
1869 #define log2n 5
1870
1871 // w_array_real[i] = cos(2*M_PI*i/32)
1872 static const real_t w_array_real[] = {
1873 FRAC_CONST(1.000000000000000), FRAC_CONST(0.980785279337272),
1874 FRAC_CONST(0.923879528329380), FRAC_CONST(0.831469603195765),
1875 FRAC_CONST(0.707106765732237), FRAC_CONST(0.555570210304169),
1876 FRAC_CONST(0.382683402077046), FRAC_CONST(0.195090284503576),
1877 FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090370246552),
1878 FRAC_CONST(-0.382683482845162), FRAC_CONST(-0.555570282993553),
1879 FRAC_CONST(-0.707106827549476), FRAC_CONST(-0.831469651765257),
1880 FRAC_CONST(-0.923879561784627), FRAC_CONST(-0.980785296392607)
1881 };
1882
1883 // w_array_imag[i] = sin(-2*M_PI*i/32)
1884 static const real_t w_array_imag[] = {
1885 FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090327375064),
1886 FRAC_CONST(-0.382683442461104), FRAC_CONST(-0.555570246648862),
1887 FRAC_CONST(-0.707106796640858), FRAC_CONST(-0.831469627480512),
1888 FRAC_CONST(-0.923879545057005), FRAC_CONST(-0.980785287864940),
1889 FRAC_CONST(-1.000000000000000), FRAC_CONST(-0.980785270809601),
1890 FRAC_CONST(-0.923879511601754), FRAC_CONST(-0.831469578911016),
1891 FRAC_CONST(-0.707106734823616), FRAC_CONST(-0.555570173959476),
1892 FRAC_CONST(-0.382683361692986), FRAC_CONST(-0.195090241632088)
1893 };
1894
1895 // FFT decimation in frequency
1896 // 4*16*2+16=128+16=144 multiplications
1897 // 6*16*2+10*8+4*16*2=192+80+128=400 additions
1898 static void fft_dif(real_t * Real, real_t * Imag)
1899 {
1900 real_t w_real, w_imag; // For faster access
1901 real_t point1_real, point1_imag, point2_real, point2_imag; // For faster access
1902 uint32_t j, i, i2, w_index; // Counters
1903
1904 // First 2 stages of 32 point FFT decimation in frequency
1905 // 4*16*2=64*2=128 multiplications
1906 // 6*16*2=96*2=192 additions
1907 // Stage 1 of 32 point FFT decimation in frequency
1908 for (i = 0; i < 16; i++)
1909 {
1910 point1_real = Real[i];
1911 point1_imag = Imag[i];
1912 i2 = i+16;
1913 point2_real = Real[i2];
1914 point2_imag = Imag[i2];
1915
1916 w_real = w_array_real[i];
1917 w_imag = w_array_imag[i];
1918
1919 // temp1 = x[i] - x[i2]
1920 point1_real -= point2_real;
1921 point1_imag -= point2_imag;
1922
1923 // x[i1] = x[i] + x[i2]
1924 Real[i] += point2_real;
1925 Imag[i] += point2_imag;
1926
1927 // x[i2] = (x[i] - x[i2]) * w
1928 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1929 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1930 }
1931 // Stage 2 of 32 point FFT decimation in frequency
1932 for (j = 0, w_index = 0; j < 8; j++, w_index += 2)
1933 {
1934 w_real = w_array_real[w_index];
1935 w_imag = w_array_imag[w_index];
1936
1937 i = j;
1938 point1_real = Real[i];
1939 point1_imag = Imag[i];
1940 i2 = i+8;
1941 point2_real = Real[i2];
1942 point2_imag = Imag[i2];
1943
1944 // temp1 = x[i] - x[i2]
1945 point1_real -= point2_real;
1946 point1_imag -= point2_imag;
1947
1948 // x[i1] = x[i] + x[i2]
1949 Real[i] += point2_real;
1950 Imag[i] += point2_imag;
1951
1952 // x[i2] = (x[i] - x[i2]) * w
1953 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1954 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1955
1956 i = j+16;
1957 point1_real = Real[i];
1958 point1_imag = Imag[i];
1959 i2 = i+8;
1960 point2_real = Real[i2];
1961 point2_imag = Imag[i2];
1962
1963 // temp1 = x[i] - x[i2]
1964 point1_real -= point2_real;
1965 point1_imag -= point2_imag;
1966
1967 // x[i1] = x[i] + x[i2]
1968 Real[i] += point2_real;
1969 Imag[i] += point2_imag;
1970
1971 // x[i2] = (x[i] - x[i2]) * w
1972 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1973 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1974 }
1975
1976 // Stage 3 of 32 point FFT decimation in frequency
1977 // 2*4*2=16 multiplications
1978 // 4*4*2+6*4*2=10*8=80 additions
1979 for (i = 0; i < n; i += 8)
1980 {
1981 i2 = i+4;
1982 point1_real = Real[i];
1983 point1_imag = Imag[i];
1984
1985 point2_real = Real[i2];
1986 point2_imag = Imag[i2];
1987
1988 // out[i1] = point1 + point2
1989 Real[i] += point2_real;
1990 Imag[i] += point2_imag;
1991
1992 // out[i2] = point1 - point2
1993 Real[i2] = point1_real - point2_real;
1994 Imag[i2] = point1_imag - point2_imag;
1995 }
1996 w_real = w_array_real[4]; // = sqrt(2)/2
1997 // w_imag = -w_real; // = w_array_imag[4]; // = -sqrt(2)/2
1998 for (i = 1; i < n; i += 8)
1999 {
2000 i2 = i+4;
2001 point1_real = Real[i];
2002 point1_imag = Imag[i];
2003
2004 point2_real = Real[i2];
2005 point2_imag = Imag[i2];
2006
2007 // temp1 = x[i] - x[i2]
2008 point1_real -= point2_real;
2009 point1_imag -= point2_imag;
2010
2011 // x[i1] = x[i] + x[i2]
2012 Real[i] += point2_real;
2013 Imag[i] += point2_imag;
2014
2015 // x[i2] = (x[i] - x[i2]) * w
2016 Real[i2] = MUL_F(point1_real+point1_imag, w_real);
2017 Imag[i2] = MUL_F(point1_imag-point1_real, w_real);
2018 }
2019 for (i = 2; i < n; i += 8)
2020 {
2021 i2 = i+4;
2022 point1_real = Real[i];
2023 point1_imag = Imag[i];
2024
2025 point2_real = Real[i2];
2026 point2_imag = Imag[i2];
2027
2028 // x[i] = x[i] + x[i2]
2029 Real[i] += point2_real;
2030 Imag[i] += point2_imag;
2031
2032 // x[i2] = (x[i] - x[i2]) * (-i)
2033 Real[i2] = point1_imag - point2_imag;
2034 Imag[i2] = point2_real - point1_real;
2035 }
2036 w_real = w_array_real[12]; // = -sqrt(2)/2
2037 // w_imag = w_real; // = w_array_imag[12]; // = -sqrt(2)/2
2038 for (i = 3; i < n; i += 8)
2039 {
2040 i2 = i+4;
2041 point1_real = Real[i];
2042 point1_imag = Imag[i];
2043
2044 point2_real = Real[i2];
2045 point2_imag = Imag[i2];
2046
2047 // temp1 = x[i] - x[i2]
2048 point1_real -= point2_real;
2049 point1_imag -= point2_imag;
2050
2051 // x[i1] = x[i] + x[i2]
2052 Real[i] += point2_real;
2053 Imag[i] += point2_imag;
2054
2055 // x[i2] = (x[i] - x[i2]) * w
2056 Real[i2] = MUL_F(point1_real-point1_imag, w_real);
2057 Imag[i2] = MUL_F(point1_real+point1_imag, w_real);
2058 }
2059
2060
2061 // Stage 4 of 32 point FFT decimation in frequency (no multiplications)
2062 // 16*4=64 additions
2063 for (i = 0; i < n; i += 4)
2064 {
2065 i2 = i+2;
2066 point1_real = Real[i];
2067 point1_imag = Imag[i];
2068
2069 point2_real = Real[i2];
2070 point2_imag = Imag[i2];
2071
2072 // x[i1] = x[i] + x[i2]
2073 Real[i] += point2_real;
2074 Imag[i] += point2_imag;
2075
2076 // x[i2] = x[i] - x[i2]
2077 Real[i2] = point1_real - point2_real;
2078 Imag[i2] = point1_imag - point2_imag;
2079 }
2080 for (i = 1; i < n; i += 4)
2081 {
2082 i2 = i+2;
2083 point1_real = Real[i];
2084 point1_imag = Imag[i];
2085
2086 point2_real = Real[i2];
2087 point2_imag = Imag[i2];
2088
2089 // x[i] = x[i] + x[i2]
2090 Real[i] += point2_real;
2091 Imag[i] += point2_imag;
2092
2093 // x[i2] = (x[i] - x[i2]) * (-i)
2094 Real[i2] = point1_imag - point2_imag;
2095 Imag[i2] = point2_real - point1_real;
2096 }
2097
2098 // Stage 5 of 32 point FFT decimation in frequency (no multiplications)
2099 // 16*4=64 additions
2100 for (i = 0; i < n; i += 2)
2101 {
2102 i2 = i+1;
2103 point1_real = Real[i];
2104 point1_imag = Imag[i];
2105
2106 point2_real = Real[i2];
2107 point2_imag = Imag[i2];
2108
2109 // out[i1] = point1 + point2
2110 Real[i] += point2_real;
2111 Imag[i] += point2_imag;
2112
2113 // out[i2] = point1 - point2
2114 Real[i2] = point1_real - point2_real;
2115 Imag[i2] = point1_imag - point2_imag;
2116 }
2117
2118 #ifdef REORDER_IN_FFT
2119 FFTReorder(Real, Imag);
2120 #endif // #ifdef REORDER_IN_FFT
2121 }
2122 #undef n
2123 #undef log2n
2124
2125 static const real_t dct4_64_tab[] = {
2126 COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507),
2127 COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537),
2128 COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708),
2129 COEF_CONST(0.953306019306183), COEF_CONST(0.937339007854462),
2130 COEF_CONST(0.919113874435425), COEF_CONST(0.898674488067627),
2131 COEF_CONST(0.876070082187653), COEF_CONST(0.851355195045471),
2132 COEF_CONST(0.824589252471924), COEF_CONST(0.795836925506592),
2133 COEF_CONST(0.765167236328125), COEF_CONST(0.732654273509979),
2134 COEF_CONST(0.698376238346100), COEF_CONST(0.662415742874146),
2135 COEF_CONST(0.624859452247620), COEF_CONST(0.585797846317291),
2136 COEF_CONST(0.545324981212616), COEF_CONST(0.503538429737091),
2137 COEF_CONST(0.460538715124130), COEF_CONST(0.416429549455643),
2138 COEF_CONST(0.371317148208618), COEF_CONST(0.325310230255127),
2139 COEF_CONST(0.278519600629807), COEF_CONST(0.231058135628700),
2140 COEF_CONST(0.183039888739586), COEF_CONST(0.134580686688423),
2141 COEF_CONST(0.085797272622585), COEF_CONST(0.036807164549828),
2142 COEF_CONST(-1.012196302413940), COEF_CONST(-1.059438824653626),
2143 COEF_CONST(-1.104129195213318), COEF_CONST(-1.146159529685974),
2144 COEF_CONST(-1.185428738594055), COEF_CONST(-1.221842169761658),
2145 COEF_CONST(-1.255311965942383), COEF_CONST(-1.285757660865784),
2146 COEF_CONST(-1.313105940818787), COEF_CONST(-1.337290763854981),
2147 COEF_CONST(-1.358253836631775), COEF_CONST(-1.375944852828980),
2148 COEF_CONST(-1.390321016311646), COEF_CONST(-1.401347875595093),
2149 COEF_CONST(-1.408998727798462), COEF_CONST(-1.413255214691162),
2150 COEF_CONST(-1.414107084274292), COEF_CONST(-1.411552190780640),
2151 COEF_CONST(-1.405596733093262), COEF_CONST(-1.396255016326904),
2152 COEF_CONST(-1.383549690246582), COEF_CONST(-1.367511272430420),
2153 COEF_CONST(-1.348178386688232), COEF_CONST(-1.325597524642944),
2154 COEF_CONST(-1.299823284149170), COEF_CONST(-1.270917654037476),
2155 COEF_CONST(-1.238950133323669), COEF_CONST(-1.203998088836670),
2156 COEF_CONST(-1.166145324707031), COEF_CONST(-1.125483393669128),
2157 COEF_CONST(-1.082109928131104), COEF_CONST(-1.036129593849182),
2158 COEF_CONST(-0.987653195858002), COEF_CONST(-0.936797380447388),
2159 COEF_CONST(-0.883684754371643), COEF_CONST(-0.828443288803101),
2160 COEF_CONST(-0.771206021308899), COEF_CONST(-0.712110757827759),
2161 COEF_CONST(-0.651300072669983), COEF_CONST(-0.588920354843140),
2162 COEF_CONST(-0.525121808052063), COEF_CONST(-0.460058242082596),
2163 COEF_CONST(-0.393886327743530), COEF_CONST(-0.326765477657318),
2164 COEF_CONST(-0.258857429027557), COEF_CONST(-0.190325915813446),
2165 COEF_CONST(-0.121335685253143), COEF_CONST(-0.052053272724152),
2166 COEF_CONST(0.017354607582092), COEF_CONST(0.086720645427704),
2167 COEF_CONST(0.155877828598022), COEF_CONST(0.224659323692322),
2168 COEF_CONST(0.292899727821350), COEF_CONST(0.360434412956238),
2169 COEF_CONST(0.427100926637650), COEF_CONST(0.492738455533981),
2170 COEF_CONST(0.557188928127289), COEF_CONST(0.620297133922577),
2171 COEF_CONST(0.681910991668701), COEF_CONST(0.741881847381592),
2172 COEF_CONST(0.800065577030182), COEF_CONST(0.856321990489960),
2173 COEF_CONST(0.910515367984772), COEF_CONST(0.962515234947205),
2174 COEF_CONST(1.000000000000000), COEF_CONST(0.998795449733734),
2175 COEF_CONST(0.995184719562531), COEF_CONST(0.989176511764526),
2176 COEF_CONST(0.980785250663757), COEF_CONST(0.970031261444092),
2177 COEF_CONST(0.956940352916718), COEF_CONST(0.941544055938721),
2178 COEF_CONST(0.923879504203796), COEF_CONST(0.903989315032959),
2179 COEF_CONST(0.881921231746674), COEF_CONST(0.857728600502014),
2180 COEF_CONST(0.831469595432281), COEF_CONST(0.803207516670227),
2181 COEF_CONST(0.773010432720184), COEF_CONST(0.740951120853424),
2182 COEF_CONST(0.707106769084930), COEF_CONST(0.671558916568756),
2183 COEF_CONST(0.634393274784088), COEF_CONST(0.595699310302734),
2184 COEF_CONST(0.555570185184479), COEF_CONST(0.514102697372437),
2185 COEF_CONST(0.471396654844284), COEF_CONST(0.427555114030838),
2186 COEF_CONST(0.382683426141739), COEF_CONST(0.336889833211899),
2187 COEF_CONST(0.290284633636475), COEF_CONST(0.242980122566223),
2188 COEF_CONST(0.195090234279633), COEF_CONST(0.146730497479439),
2189 COEF_CONST(0.098017133772373), COEF_CONST(0.049067649990320),
2190 COEF_CONST(-1.000000000000000), COEF_CONST(-1.047863125801086),
2191 COEF_CONST(-1.093201875686646), COEF_CONST(-1.135906934738159),
2192 COEF_CONST(-1.175875544548035), COEF_CONST(-1.213011503219605),
2193 COEF_CONST(-1.247225046157837), COEF_CONST(-1.278433918952942),
2194 COEF_CONST(-1.306562900543213), COEF_CONST(-1.331544399261475),
2195 COEF_CONST(-1.353317975997925), COEF_CONST(-1.371831417083740),
2196 COEF_CONST(-1.387039899826050), COEF_CONST(-1.398906826972961),
2197 COEF_CONST(-1.407403707504273), COEF_CONST(-1.412510156631470),
2198 COEF_CONST(0), COEF_CONST(-1.412510156631470),
2199 COEF_CONST(-1.407403707504273), COEF_CONST(-1.398906826972961),
2200 COEF_CONST(-1.387039899826050), COEF_CONST(-1.371831417083740),
2201 COEF_CONST(-1.353317975997925), COEF_CONST(-1.331544399261475),
2202 COEF_CONST(-1.306562900543213), COEF_CONST(-1.278433918952942),
2203 COEF_CONST(-1.247225046157837), COEF_CONST(-1.213011384010315),
2204 COEF_CONST(-1.175875544548035), COEF_CONST(-1.135907053947449),
2205 COEF_CONST(-1.093201875686646), COEF_CONST(-1.047863125801086),
2206 COEF_CONST(-1.000000000000000), COEF_CONST(-0.949727773666382),
2207 COEF_CONST(-0.897167563438416), COEF_CONST(-0.842446029186249),
2208 COEF_CONST(-0.785694956779480), COEF_CONST(-0.727051079273224),
2209 COEF_CONST(-0.666655659675598), COEF_CONST(-0.604654192924500),
2210 COEF_CONST(-0.541196048259735), COEF_CONST(-0.476434230804443),
2211 COEF_CONST(-0.410524487495422), COEF_CONST(-0.343625843524933),
2212 COEF_CONST(-0.275899350643158), COEF_CONST(-0.207508206367493),
2213 COEF_CONST(-0.138617098331451), COEF_CONST(-0.069392144680023),
2214 COEF_CONST(0), COEF_CONST(0.069392263889313),
2215 COEF_CONST(0.138617157936096), COEF_CONST(0.207508206367493),
2216 COEF_CONST(0.275899469852448), COEF_CONST(0.343625962734222),
2217 COEF_CONST(0.410524636507034), COEF_CONST(0.476434201002121),
2218 COEF_CONST(0.541196107864380), COEF_CONST(0.604654192924500),
2219 COEF_CONST(0.666655719280243), COEF_CONST(0.727051138877869),
2220 COEF_CONST(0.785695075988770), COEF_CONST(0.842446029186249),
2221 COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382)
2222 };
2223
2224 /* size 64 only! */
2225 void dct4_kernel(real_t * in_real, real_t * in_imag, real_t * out_real, real_t * out_imag)
2226 {
2227 // Tables with bit reverse values for 5 bits, bit reverse of i at i-th position
2228 const uint8_t bit_rev_tab[32] = { 0,16,8,24,4,20,12,28,2,18,10,26,6,22,14,30,1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31 };
2229 uint32_t i, i_rev;
2230
2231 /* Step 2: modulate */
2232 // 3*32=96 multiplications
2233 // 3*32=96 additions
2234 for (i = 0; i < 32; i++)
2235 {
2236 real_t x_re, x_im, tmp;
2237 x_re = in_real[i];
2238 x_im = in_imag[i];
2239 tmp = MUL_C(x_re + x_im, dct4_64_tab[i]);
2240 in_real[i] = MUL_C(x_im, dct4_64_tab[i + 64]) + tmp;
2241 in_imag[i] = MUL_C(x_re, dct4_64_tab[i + 32]) + tmp;
2242 }
2243
2244 /* Step 3: FFT, but with output in bit reverse order */
2245 fft_dif(in_real, in_imag);
2246
2247 /* Step 4: modulate + bitreverse reordering */
2248 // 3*31+2=95 multiplications
2249 // 3*31+2=95 additions
2250 for (i = 0; i < 16; i++)
2251 {
2252 real_t x_re, x_im, tmp;
2253 i_rev = bit_rev_tab[i];
2254 x_re = in_real[i_rev];
2255 x_im = in_imag[i_rev];
2256
2257 tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);
2258 out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp;
2259 out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp;
2260 }
2261 // i = 16, i_rev = 1 = rev(16);
2262 out_imag[16] = MUL_C(in_imag[1] - in_real[1], dct4_64_tab[16 + 3*32]);
2263 out_real[16] = MUL_C(in_real[1] + in_imag[1], dct4_64_tab[16 + 3*32]);
2264 for (i = 17; i < 32; i++)
2265 {
2266 real_t x_re, x_im, tmp;
2267 i_rev = bit_rev_tab[i];
2268 x_re = in_real[i_rev];
2269 x_im = in_imag[i_rev];
2270 tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);
2271 out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp;
2272 out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp;
2273 }
2274
2275 }
2276
2269 #endif 2277 #endif
2270 2278
2271 #endif 2279 #endif