comparison mpegaudiodec.c @ 2495:7a79cb42eddb libavcodec

optimizing imdct12
author michael
date Wed, 02 Feb 2005 22:38:28 +0000
parents c559ea6e395c
children 74d7fd7b49c5
comparison
equal deleted inserted replaced
2494:36d70fbb31c5 2495:7a79cb42eddb
463 463
464 /* compute mdct windows */ 464 /* compute mdct windows */
465 for(i=0;i<36;i++) { 465 for(i=0;i<36;i++) {
466 for(j=0; j<4; j++){ 466 for(j=0; j<4; j++){
467 double d; 467 double d;
468 if(j==2) continue; 468
469 if(j==2 && i%3 != 1)
470 continue;
469 471
470 d= sin(M_PI * (i + 0.5) / 36.0); 472 d= sin(M_PI * (i + 0.5) / 36.0);
471 if(j==1){ 473 if(j==1){
472 if (i>=30) d= 0; 474 if (i>=30) d= 0;
473 else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0); 475 else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0);
476 if (i< 6) d= 0; 478 if (i< 6) d= 0;
477 else if(i< 12) d= sin(M_PI * (i - 6 + 0.5) / 12.0); 479 else if(i< 12) d= sin(M_PI * (i - 6 + 0.5) / 12.0);
478 else if(i< 18) d= 1; 480 else if(i< 18) d= 1;
479 } 481 }
480 //merge last stage of imdct into the window coefficients 482 //merge last stage of imdct into the window coefficients
481 if (i/9 == 0) d*= 0.5 / cos(M_PI*(2*( i) +19)/72); 483 d*= 0.5 / cos(M_PI*(2*i + 19)/72);
482 else if(i/9 == 1) d*= 0.5 / cos(M_PI*(2*(17 - i) +19)/72); 484
483 else if(i/9 == 2) d*= 0.5 / cos(M_PI*(2*( i) +19)/72); 485 if(j==2)
484 else d*=-0.5 / cos(M_PI*(2*(17 - i) +19)/72); 486 mdct_win[j][i/3] = FIXHR((d / (1<<5)));
485 mdct_win[j][i] = FIXHR((d / (1<<5))); 487 else
488 mdct_win[j][i ] = FIXHR((d / (1<<5)));
486 // av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5)); 489 // av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5));
487 } 490 }
488 } 491 }
489 492
490 for(i=0;i<12;i++)
491 mdct_win[2][i] = FIXR(sin(M_PI * (i + 0.5) / 12.0));
492
493 /* NOTE: we do frequency inversion adter the MDCT by changing 493 /* NOTE: we do frequency inversion adter the MDCT by changing
494 the sign of the right window coefs */ 494 the sign of the right window coefs */
495 for(j=0;j<4;j++) { 495 for(j=0;j<4;j++) {
496 for(i=0;i<36;i+=2) { 496 for(i=0;i<36;i+=2) {
497 mdct_win[j + 4][i] = mdct_win[j][i]; 497 mdct_win[j + 4][i] = mdct_win[j][i];
929 929
930 offset = (offset - 32) & 511; 930 offset = (offset - 32) & 511;
931 *synth_buf_offset = offset; 931 *synth_buf_offset = offset;
932 } 932 }
933 933
934 /* cos(pi*i/24) */ 934 #define C3 FIXHR(0.86602540378443864676/2)
935 #define C1 FIXR(0.99144486137381041114) 935
936 #define C3 FIXR(0.92387953251128675612) 936 /* 0.5 / cos(pi*(2*i+1)/36) */
937 #define C5 FIXR(0.79335334029123516458) 937 static const int icos36[9] = {
938 #define C7 FIXR(0.60876142900872063941) 938 FIXR(0.50190991877167369479),
939 #define C9 FIXR(0.38268343236508977173) 939 FIXR(0.51763809020504152469), //0
940 #define C11 FIXR(0.13052619222005159154) 940 FIXR(0.55168895948124587824),
941 FIXR(0.61038729438072803416),
942 FIXR(0.70710678118654752439), //1
943 FIXR(0.87172339781054900991),
944 FIXR(1.18310079157624925896),
945 FIXR(1.93185165257813657349), //2
946 FIXR(5.73685662283492756461),
947 };
941 948
942 /* 12 points IMDCT. We compute it "by hand" by factorizing obvious 949 /* 12 points IMDCT. We compute it "by hand" by factorizing obvious
943 cases. */ 950 cases. */
944 static void imdct12(int *out, int *in) 951 static void imdct12(int *out, int *in)
945 { 952 {
946 int tmp; 953 int in0, in1, in2, in3, in4, in5, t1, t2;
947 int64_t in1_3, in1_9, in4_3, in4_9; 954 in0= in[0*3]<<5;
948 955 in1= (in[1*3] + in[0*3])<<5;
949 in1_3 = MUL64(in[1], C3); 956 in2= (in[2*3] + in[1*3])<<5;
950 in1_9 = MUL64(in[1], C9); 957 in3= (in[3*3] + in[2*3])<<5;
951 in4_3 = MUL64(in[4], C3); 958 in4= (in[4*3] + in[3*3])<<5;
952 in4_9 = MUL64(in[4], C9); 959 in5= (in[5*3] + in[4*3])<<5;
953 960 in5 += in3;
954 tmp = FRAC_RND(MUL64(in[0], C7) - in1_3 - MUL64(in[2], C11) + 961 in3 += in1;
955 MUL64(in[3], C1) - in4_9 - MUL64(in[5], C5)); 962
956 out[0] = tmp; 963 in2= MULH(2*in2, C3);
957 out[5] = -tmp; 964 in3= MULH(2*in3, C3);
958 tmp = FRAC_RND(MUL64(in[0] - in[3], C9) - in1_3 + 965
959 MUL64(in[2] + in[5], C3) - in4_9); 966 t1 = in0 - in4;
960 out[1] = tmp; 967 t2 = MULL(in1 - in5, icos36[4]);
961 out[4] = -tmp; 968
962 tmp = FRAC_RND(MUL64(in[0], C11) - in1_9 + MUL64(in[2], C7) - 969 out[ 7]=
963 MUL64(in[3], C5) + in4_3 - MUL64(in[5], C1)); 970 out[10]= t1 + t2;
964 out[2] = tmp; 971 out[ 1]=
965 out[3] = -tmp; 972 out[ 4]= t1 - t2;
966 tmp = FRAC_RND(MUL64(-in[0], C5) + in1_9 + MUL64(in[2], C1) + 973
967 MUL64(in[3], C11) - in4_3 - MUL64(in[5], C7)); 974 in0 += in4>>1;
968 out[6] = tmp; 975 in4 = in0 + in2;
969 out[11] = tmp; 976 in1 += in5>>1;
970 tmp = FRAC_RND(MUL64(-in[0] + in[3], C3) - in1_9 + 977 in5 = MULL(in1 + in3, icos36[1]);
971 MUL64(in[2] + in[5], C9) + in4_3); 978 out[ 8]=
972 out[7] = tmp; 979 out[ 9]= in4 + in5;
973 out[10] = tmp; 980 out[ 2]=
974 tmp = FRAC_RND(-MUL64(in[0], C1) - in1_3 - MUL64(in[2], C5) - 981 out[ 3]= in4 - in5;
975 MUL64(in[3], C7) - in4_9 - MUL64(in[5], C11)); 982
976 out[8] = tmp; 983 in0 -= in2;
977 out[9] = tmp; 984 in1 = MULL(in1 - in3, icos36[7]);
978 } 985 out[ 0]=
979 986 out[ 5]= in0 - in1;
980 #undef C1 987 out[ 6]=
981 #undef C3 988 out[11]= in0 + in1;
982 #undef C5 989 }
983 #undef C7
984 #undef C9
985 #undef C11
986 990
987 /* cos(pi*i/18) */ 991 /* cos(pi*i/18) */
988 #define C1 FIXHR(0.98480775301220805936/2) 992 #define C1 FIXHR(0.98480775301220805936/2)
989 #define C2 FIXHR(0.93969262078590838405/2) 993 #define C2 FIXHR(0.93969262078590838405/2)
990 #define C3 FIXHR(0.86602540378443864676/2) 994 #define C3 FIXHR(0.86602540378443864676/2)
993 #define C6 FIXHR(0.5/2) 997 #define C6 FIXHR(0.5/2)
994 #define C7 FIXHR(0.34202014332566873304/2) 998 #define C7 FIXHR(0.34202014332566873304/2)
995 #define C8 FIXHR(0.17364817766693034885/2) 999 #define C8 FIXHR(0.17364817766693034885/2)
996 1000
997 1001
998 /* 0.5 / cos(pi*(2*i+1)/36) */
999 static const int icos36[9] = {
1000 FIXR(0.50190991877167369479),
1001 FIXR(0.51763809020504152469),
1002 FIXR(0.55168895948124587824),
1003 FIXR(0.61038729438072803416),
1004 FIXR(0.70710678118654752439),
1005 FIXR(0.87172339781054900991),
1006 FIXR(1.18310079157624925896),
1007 FIXR(1.93185165257813657349),
1008 FIXR(5.73685662283492756461),
1009 };
1010 /* using Lee like decomposition followed by hand coded 9 points DCT */ 1002 /* using Lee like decomposition followed by hand coded 9 points DCT */
1011 static void imdct36(int *out, int *buf, int *in, int *win) 1003 static void imdct36(int *out, int *buf, int *in, int *win)
1012 { 1004 {
1013 int i, j, t0, t1, t2, t3, s0, s1, s2, s3; 1005 int i, j, t0, t1, t2, t3, s0, s1, s2, s3;
1014 int tmp[18], *tmp1, *in1; 1006 int tmp[18], *tmp1, *in1;
1090 s1 = MULL(t3 + t2, icos36[j]); 1082 s1 = MULL(t3 + t2, icos36[j]);
1091 s3 = MULL(t3 - t2, icos36[8 - j]); 1083 s3 = MULL(t3 - t2, icos36[8 - j]);
1092 1084
1093 t0 = (s0 + s1) << 5; 1085 t0 = (s0 + s1) << 5;
1094 t1 = (s0 - s1) << 5; 1086 t1 = (s0 - s1) << 5;
1095 out[(9 + j)*SBLIMIT] = -MULH(t1, win[9 + j]) + buf[9 + j]; 1087 out[(9 + j)*SBLIMIT] = MULH(t1, win[9 + j]) + buf[9 + j];
1096 out[(8 - j)*SBLIMIT] = MULH(t1, win[8 - j]) + buf[8 - j]; 1088 out[(8 - j)*SBLIMIT] = MULH(t1, win[8 - j]) + buf[8 - j];
1097 buf[9 + j] = MULH(t0, win[18 + 9 + j]); 1089 buf[9 + j] = MULH(t0, win[18 + 9 + j]);
1098 buf[8 - j] = MULH(t0, win[18 + 8 - j]); 1090 buf[8 - j] = MULH(t0, win[18 + 8 - j]);
1099 1091
1100 t0 = (s2 + s3) << 5; 1092 t0 = (s2 + s3) << 5;
1101 t1 = (s2 - s3) << 5; 1093 t1 = (s2 - s3) << 5;
1102 out[(9 + 8 - j)*SBLIMIT] = -MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j]; 1094 out[(9 + 8 - j)*SBLIMIT] = MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j];
1103 out[( j)*SBLIMIT] = MULH(t1, win[ j]) + buf[ j]; 1095 out[( j)*SBLIMIT] = MULH(t1, win[ j]) + buf[ j];
1104 buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]); 1096 buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]);
1105 buf[ + j] = MULH(t0, win[18 + j]); 1097 buf[ + j] = MULH(t0, win[18 + j]);
1106 i += 4; 1098 i += 4;
1107 } 1099 }
1108 1100
1109 s0 = tmp[16]; 1101 s0 = tmp[16];
1110 s1 = MULL(tmp[17], icos36[4]); 1102 s1 = MULL(tmp[17], icos36[4]);
1111 t0 = (s0 + s1) << 5; 1103 t0 = (s0 + s1) << 5;
1112 t1 = (s0 - s1) << 5; 1104 t1 = (s0 - s1) << 5;
1113 out[(9 + 4)*SBLIMIT] = -MULH(t1, win[9 + 4]) + buf[9 + 4]; 1105 out[(9 + 4)*SBLIMIT] = MULH(t1, win[9 + 4]) + buf[9 + 4];
1114 out[(8 - 4)*SBLIMIT] = MULH(t1, win[8 - 4]) + buf[8 - 4]; 1106 out[(8 - 4)*SBLIMIT] = MULH(t1, win[8 - 4]) + buf[8 - 4];
1115 buf[9 + 4] = MULH(t0, win[18 + 9 + 4]); 1107 buf[9 + 4] = MULH(t0, win[18 + 9 + 4]);
1116 buf[8 - 4] = MULH(t0, win[18 + 8 - 4]); 1108 buf[8 - 4] = MULH(t0, win[18 + 8 - 4]);
1117 } 1109 }
1118 1110
1989 static void compute_imdct(MPADecodeContext *s, 1981 static void compute_imdct(MPADecodeContext *s,
1990 GranuleDef *g, 1982 GranuleDef *g,
1991 int32_t *sb_samples, 1983 int32_t *sb_samples,
1992 int32_t *mdct_buf) 1984 int32_t *mdct_buf)
1993 { 1985 {
1994 int32_t *ptr, *win, *win1, *buf, *buf2, *out_ptr, *ptr1; 1986 int32_t *ptr, *win, *win1, *buf, *out_ptr, *ptr1;
1995 int32_t in[6];
1996 int32_t out[36];
1997 int32_t out2[12]; 1987 int32_t out2[12];
1998 int i, j, k, mdct_long_end, v, sblimit; 1988 int i, j, mdct_long_end, v, sblimit;
1999 1989
2000 /* find last non zero block */ 1990 /* find last non zero block */
2001 ptr = g->sb_hybrid + 576; 1991 ptr = g->sb_hybrid + 576;
2002 ptr1 = g->sb_hybrid + 2 * 18; 1992 ptr1 = g->sb_hybrid + 2 * 18;
2003 while (ptr >= ptr1) { 1993 while (ptr >= ptr1) {
2034 out_ptr += 18*SBLIMIT; 2024 out_ptr += 18*SBLIMIT;
2035 ptr += 18; 2025 ptr += 18;
2036 buf += 18; 2026 buf += 18;
2037 } 2027 }
2038 for(j=mdct_long_end;j<sblimit;j++) { 2028 for(j=mdct_long_end;j<sblimit;j++) {
2039 for(i=0;i<6;i++) {
2040 out[i] = 0;
2041 out[6 + i] = 0;
2042 out[30+i] = 0;
2043 }
2044 /* select frequency inversion */ 2029 /* select frequency inversion */
2045 win = mdct_win[2] + ((4 * 36) & -(j & 1)); 2030 win = mdct_win[2] + ((4 * 36) & -(j & 1));
2046 buf2 = out + 6;
2047 for(k=0;k<3;k++) {
2048 /* reorder input for short mdct */
2049 ptr1 = ptr + k;
2050 for(i=0;i<6;i++) {
2051 in[i] = *ptr1;
2052 ptr1 += 3;
2053 }
2054 imdct12(out2, in);
2055 /* apply 12 point window and do small overlap */
2056 for(i=0;i<6;i++) {
2057 buf2[i] = MULL(out2[i], win[i]) + buf2[i];
2058 buf2[i + 6] = MULL(out2[i + 6], win[i + 6]);
2059 }
2060 buf2 += 6;
2061 }
2062 /* overlap */
2063 out_ptr = sb_samples + j; 2031 out_ptr = sb_samples + j;
2064 for(i=0;i<18;i++) { 2032
2065 *out_ptr = out[i] + buf[i]; 2033 for(i=0; i<6; i++){
2066 buf[i] = out[i + 18]; 2034 *out_ptr = buf[i];
2067 out_ptr += SBLIMIT; 2035 out_ptr += SBLIMIT;
2036 }
2037 imdct12(out2, ptr + 0);
2038 for(i=0;i<6;i++) {
2039 *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*1];
2040 buf[i + 6*2] = MULH(out2[i + 6], win[i + 6]);
2041 out_ptr += SBLIMIT;
2042 }
2043 imdct12(out2, ptr + 1);
2044 for(i=0;i<6;i++) {
2045 *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*2];
2046 buf[i + 6*0] = MULH(out2[i + 6], win[i + 6]);
2047 out_ptr += SBLIMIT;
2048 }
2049 imdct12(out2, ptr + 2);
2050 for(i=0;i<6;i++) {
2051 buf[i + 6*0] = MULH(out2[i], win[i]) + buf[i + 6*0];
2052 buf[i + 6*1] = MULH(out2[i + 6], win[i + 6]);
2053 buf[i + 6*2] = 0;
2068 } 2054 }
2069 ptr += 18; 2055 ptr += 18;
2070 buf += 18; 2056 buf += 18;
2071 } 2057 }
2072 /* zero bands */ 2058 /* zero bands */