Mercurial > libavcodec.hg
comparison mpegaudiodec.c @ 2495:7a79cb42eddb libavcodec
optimizing imdct12
author | michael |
---|---|
date | Wed, 02 Feb 2005 22:38:28 +0000 |
parents | c559ea6e395c |
children | 74d7fd7b49c5 |
comparison
equal
deleted
inserted
replaced
2494:36d70fbb31c5 | 2495:7a79cb42eddb |
---|---|
463 | 463 |
464 /* compute mdct windows */ | 464 /* compute mdct windows */ |
465 for(i=0;i<36;i++) { | 465 for(i=0;i<36;i++) { |
466 for(j=0; j<4; j++){ | 466 for(j=0; j<4; j++){ |
467 double d; | 467 double d; |
468 if(j==2) continue; | 468 |
469 if(j==2 && i%3 != 1) | |
470 continue; | |
469 | 471 |
470 d= sin(M_PI * (i + 0.5) / 36.0); | 472 d= sin(M_PI * (i + 0.5) / 36.0); |
471 if(j==1){ | 473 if(j==1){ |
472 if (i>=30) d= 0; | 474 if (i>=30) d= 0; |
473 else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0); | 475 else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0); |
476 if (i< 6) d= 0; | 478 if (i< 6) d= 0; |
477 else if(i< 12) d= sin(M_PI * (i - 6 + 0.5) / 12.0); | 479 else if(i< 12) d= sin(M_PI * (i - 6 + 0.5) / 12.0); |
478 else if(i< 18) d= 1; | 480 else if(i< 18) d= 1; |
479 } | 481 } |
480 //merge last stage of imdct into the window coefficients | 482 //merge last stage of imdct into the window coefficients |
481 if (i/9 == 0) d*= 0.5 / cos(M_PI*(2*( i) +19)/72); | 483 d*= 0.5 / cos(M_PI*(2*i + 19)/72); |
482 else if(i/9 == 1) d*= 0.5 / cos(M_PI*(2*(17 - i) +19)/72); | 484 |
483 else if(i/9 == 2) d*= 0.5 / cos(M_PI*(2*( i) +19)/72); | 485 if(j==2) |
484 else d*=-0.5 / cos(M_PI*(2*(17 - i) +19)/72); | 486 mdct_win[j][i/3] = FIXHR((d / (1<<5))); |
485 mdct_win[j][i] = FIXHR((d / (1<<5))); | 487 else |
488 mdct_win[j][i ] = FIXHR((d / (1<<5))); | |
486 // av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5)); | 489 // av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5)); |
487 } | 490 } |
488 } | 491 } |
489 | 492 |
490 for(i=0;i<12;i++) | |
491 mdct_win[2][i] = FIXR(sin(M_PI * (i + 0.5) / 12.0)); | |
492 | |
493 /* NOTE: we do frequency inversion adter the MDCT by changing | 493 /* NOTE: we do frequency inversion adter the MDCT by changing |
494 the sign of the right window coefs */ | 494 the sign of the right window coefs */ |
495 for(j=0;j<4;j++) { | 495 for(j=0;j<4;j++) { |
496 for(i=0;i<36;i+=2) { | 496 for(i=0;i<36;i+=2) { |
497 mdct_win[j + 4][i] = mdct_win[j][i]; | 497 mdct_win[j + 4][i] = mdct_win[j][i]; |
929 | 929 |
930 offset = (offset - 32) & 511; | 930 offset = (offset - 32) & 511; |
931 *synth_buf_offset = offset; | 931 *synth_buf_offset = offset; |
932 } | 932 } |
933 | 933 |
934 /* cos(pi*i/24) */ | 934 #define C3 FIXHR(0.86602540378443864676/2) |
935 #define C1 FIXR(0.99144486137381041114) | 935 |
936 #define C3 FIXR(0.92387953251128675612) | 936 /* 0.5 / cos(pi*(2*i+1)/36) */ |
937 #define C5 FIXR(0.79335334029123516458) | 937 static const int icos36[9] = { |
938 #define C7 FIXR(0.60876142900872063941) | 938 FIXR(0.50190991877167369479), |
939 #define C9 FIXR(0.38268343236508977173) | 939 FIXR(0.51763809020504152469), //0 |
940 #define C11 FIXR(0.13052619222005159154) | 940 FIXR(0.55168895948124587824), |
941 FIXR(0.61038729438072803416), | |
942 FIXR(0.70710678118654752439), //1 | |
943 FIXR(0.87172339781054900991), | |
944 FIXR(1.18310079157624925896), | |
945 FIXR(1.93185165257813657349), //2 | |
946 FIXR(5.73685662283492756461), | |
947 }; | |
941 | 948 |
942 /* 12 points IMDCT. We compute it "by hand" by factorizing obvious | 949 /* 12 points IMDCT. We compute it "by hand" by factorizing obvious |
943 cases. */ | 950 cases. */ |
944 static void imdct12(int *out, int *in) | 951 static void imdct12(int *out, int *in) |
945 { | 952 { |
946 int tmp; | 953 int in0, in1, in2, in3, in4, in5, t1, t2; |
947 int64_t in1_3, in1_9, in4_3, in4_9; | 954 in0= in[0*3]<<5; |
948 | 955 in1= (in[1*3] + in[0*3])<<5; |
949 in1_3 = MUL64(in[1], C3); | 956 in2= (in[2*3] + in[1*3])<<5; |
950 in1_9 = MUL64(in[1], C9); | 957 in3= (in[3*3] + in[2*3])<<5; |
951 in4_3 = MUL64(in[4], C3); | 958 in4= (in[4*3] + in[3*3])<<5; |
952 in4_9 = MUL64(in[4], C9); | 959 in5= (in[5*3] + in[4*3])<<5; |
953 | 960 in5 += in3; |
954 tmp = FRAC_RND(MUL64(in[0], C7) - in1_3 - MUL64(in[2], C11) + | 961 in3 += in1; |
955 MUL64(in[3], C1) - in4_9 - MUL64(in[5], C5)); | 962 |
956 out[0] = tmp; | 963 in2= MULH(2*in2, C3); |
957 out[5] = -tmp; | 964 in3= MULH(2*in3, C3); |
958 tmp = FRAC_RND(MUL64(in[0] - in[3], C9) - in1_3 + | 965 |
959 MUL64(in[2] + in[5], C3) - in4_9); | 966 t1 = in0 - in4; |
960 out[1] = tmp; | 967 t2 = MULL(in1 - in5, icos36[4]); |
961 out[4] = -tmp; | 968 |
962 tmp = FRAC_RND(MUL64(in[0], C11) - in1_9 + MUL64(in[2], C7) - | 969 out[ 7]= |
963 MUL64(in[3], C5) + in4_3 - MUL64(in[5], C1)); | 970 out[10]= t1 + t2; |
964 out[2] = tmp; | 971 out[ 1]= |
965 out[3] = -tmp; | 972 out[ 4]= t1 - t2; |
966 tmp = FRAC_RND(MUL64(-in[0], C5) + in1_9 + MUL64(in[2], C1) + | 973 |
967 MUL64(in[3], C11) - in4_3 - MUL64(in[5], C7)); | 974 in0 += in4>>1; |
968 out[6] = tmp; | 975 in4 = in0 + in2; |
969 out[11] = tmp; | 976 in1 += in5>>1; |
970 tmp = FRAC_RND(MUL64(-in[0] + in[3], C3) - in1_9 + | 977 in5 = MULL(in1 + in3, icos36[1]); |
971 MUL64(in[2] + in[5], C9) + in4_3); | 978 out[ 8]= |
972 out[7] = tmp; | 979 out[ 9]= in4 + in5; |
973 out[10] = tmp; | 980 out[ 2]= |
974 tmp = FRAC_RND(-MUL64(in[0], C1) - in1_3 - MUL64(in[2], C5) - | 981 out[ 3]= in4 - in5; |
975 MUL64(in[3], C7) - in4_9 - MUL64(in[5], C11)); | 982 |
976 out[8] = tmp; | 983 in0 -= in2; |
977 out[9] = tmp; | 984 in1 = MULL(in1 - in3, icos36[7]); |
978 } | 985 out[ 0]= |
979 | 986 out[ 5]= in0 - in1; |
980 #undef C1 | 987 out[ 6]= |
981 #undef C3 | 988 out[11]= in0 + in1; |
982 #undef C5 | 989 } |
983 #undef C7 | |
984 #undef C9 | |
985 #undef C11 | |
986 | 990 |
987 /* cos(pi*i/18) */ | 991 /* cos(pi*i/18) */ |
988 #define C1 FIXHR(0.98480775301220805936/2) | 992 #define C1 FIXHR(0.98480775301220805936/2) |
989 #define C2 FIXHR(0.93969262078590838405/2) | 993 #define C2 FIXHR(0.93969262078590838405/2) |
990 #define C3 FIXHR(0.86602540378443864676/2) | 994 #define C3 FIXHR(0.86602540378443864676/2) |
993 #define C6 FIXHR(0.5/2) | 997 #define C6 FIXHR(0.5/2) |
994 #define C7 FIXHR(0.34202014332566873304/2) | 998 #define C7 FIXHR(0.34202014332566873304/2) |
995 #define C8 FIXHR(0.17364817766693034885/2) | 999 #define C8 FIXHR(0.17364817766693034885/2) |
996 | 1000 |
997 | 1001 |
998 /* 0.5 / cos(pi*(2*i+1)/36) */ | |
999 static const int icos36[9] = { | |
1000 FIXR(0.50190991877167369479), | |
1001 FIXR(0.51763809020504152469), | |
1002 FIXR(0.55168895948124587824), | |
1003 FIXR(0.61038729438072803416), | |
1004 FIXR(0.70710678118654752439), | |
1005 FIXR(0.87172339781054900991), | |
1006 FIXR(1.18310079157624925896), | |
1007 FIXR(1.93185165257813657349), | |
1008 FIXR(5.73685662283492756461), | |
1009 }; | |
1010 /* using Lee like decomposition followed by hand coded 9 points DCT */ | 1002 /* using Lee like decomposition followed by hand coded 9 points DCT */ |
1011 static void imdct36(int *out, int *buf, int *in, int *win) | 1003 static void imdct36(int *out, int *buf, int *in, int *win) |
1012 { | 1004 { |
1013 int i, j, t0, t1, t2, t3, s0, s1, s2, s3; | 1005 int i, j, t0, t1, t2, t3, s0, s1, s2, s3; |
1014 int tmp[18], *tmp1, *in1; | 1006 int tmp[18], *tmp1, *in1; |
1090 s1 = MULL(t3 + t2, icos36[j]); | 1082 s1 = MULL(t3 + t2, icos36[j]); |
1091 s3 = MULL(t3 - t2, icos36[8 - j]); | 1083 s3 = MULL(t3 - t2, icos36[8 - j]); |
1092 | 1084 |
1093 t0 = (s0 + s1) << 5; | 1085 t0 = (s0 + s1) << 5; |
1094 t1 = (s0 - s1) << 5; | 1086 t1 = (s0 - s1) << 5; |
1095 out[(9 + j)*SBLIMIT] = -MULH(t1, win[9 + j]) + buf[9 + j]; | 1087 out[(9 + j)*SBLIMIT] = MULH(t1, win[9 + j]) + buf[9 + j]; |
1096 out[(8 - j)*SBLIMIT] = MULH(t1, win[8 - j]) + buf[8 - j]; | 1088 out[(8 - j)*SBLIMIT] = MULH(t1, win[8 - j]) + buf[8 - j]; |
1097 buf[9 + j] = MULH(t0, win[18 + 9 + j]); | 1089 buf[9 + j] = MULH(t0, win[18 + 9 + j]); |
1098 buf[8 - j] = MULH(t0, win[18 + 8 - j]); | 1090 buf[8 - j] = MULH(t0, win[18 + 8 - j]); |
1099 | 1091 |
1100 t0 = (s2 + s3) << 5; | 1092 t0 = (s2 + s3) << 5; |
1101 t1 = (s2 - s3) << 5; | 1093 t1 = (s2 - s3) << 5; |
1102 out[(9 + 8 - j)*SBLIMIT] = -MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j]; | 1094 out[(9 + 8 - j)*SBLIMIT] = MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j]; |
1103 out[( j)*SBLIMIT] = MULH(t1, win[ j]) + buf[ j]; | 1095 out[( j)*SBLIMIT] = MULH(t1, win[ j]) + buf[ j]; |
1104 buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]); | 1096 buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]); |
1105 buf[ + j] = MULH(t0, win[18 + j]); | 1097 buf[ + j] = MULH(t0, win[18 + j]); |
1106 i += 4; | 1098 i += 4; |
1107 } | 1099 } |
1108 | 1100 |
1109 s0 = tmp[16]; | 1101 s0 = tmp[16]; |
1110 s1 = MULL(tmp[17], icos36[4]); | 1102 s1 = MULL(tmp[17], icos36[4]); |
1111 t0 = (s0 + s1) << 5; | 1103 t0 = (s0 + s1) << 5; |
1112 t1 = (s0 - s1) << 5; | 1104 t1 = (s0 - s1) << 5; |
1113 out[(9 + 4)*SBLIMIT] = -MULH(t1, win[9 + 4]) + buf[9 + 4]; | 1105 out[(9 + 4)*SBLIMIT] = MULH(t1, win[9 + 4]) + buf[9 + 4]; |
1114 out[(8 - 4)*SBLIMIT] = MULH(t1, win[8 - 4]) + buf[8 - 4]; | 1106 out[(8 - 4)*SBLIMIT] = MULH(t1, win[8 - 4]) + buf[8 - 4]; |
1115 buf[9 + 4] = MULH(t0, win[18 + 9 + 4]); | 1107 buf[9 + 4] = MULH(t0, win[18 + 9 + 4]); |
1116 buf[8 - 4] = MULH(t0, win[18 + 8 - 4]); | 1108 buf[8 - 4] = MULH(t0, win[18 + 8 - 4]); |
1117 } | 1109 } |
1118 | 1110 |
1989 static void compute_imdct(MPADecodeContext *s, | 1981 static void compute_imdct(MPADecodeContext *s, |
1990 GranuleDef *g, | 1982 GranuleDef *g, |
1991 int32_t *sb_samples, | 1983 int32_t *sb_samples, |
1992 int32_t *mdct_buf) | 1984 int32_t *mdct_buf) |
1993 { | 1985 { |
1994 int32_t *ptr, *win, *win1, *buf, *buf2, *out_ptr, *ptr1; | 1986 int32_t *ptr, *win, *win1, *buf, *out_ptr, *ptr1; |
1995 int32_t in[6]; | |
1996 int32_t out[36]; | |
1997 int32_t out2[12]; | 1987 int32_t out2[12]; |
1998 int i, j, k, mdct_long_end, v, sblimit; | 1988 int i, j, mdct_long_end, v, sblimit; |
1999 | 1989 |
2000 /* find last non zero block */ | 1990 /* find last non zero block */ |
2001 ptr = g->sb_hybrid + 576; | 1991 ptr = g->sb_hybrid + 576; |
2002 ptr1 = g->sb_hybrid + 2 * 18; | 1992 ptr1 = g->sb_hybrid + 2 * 18; |
2003 while (ptr >= ptr1) { | 1993 while (ptr >= ptr1) { |
2034 out_ptr += 18*SBLIMIT; | 2024 out_ptr += 18*SBLIMIT; |
2035 ptr += 18; | 2025 ptr += 18; |
2036 buf += 18; | 2026 buf += 18; |
2037 } | 2027 } |
2038 for(j=mdct_long_end;j<sblimit;j++) { | 2028 for(j=mdct_long_end;j<sblimit;j++) { |
2039 for(i=0;i<6;i++) { | |
2040 out[i] = 0; | |
2041 out[6 + i] = 0; | |
2042 out[30+i] = 0; | |
2043 } | |
2044 /* select frequency inversion */ | 2029 /* select frequency inversion */ |
2045 win = mdct_win[2] + ((4 * 36) & -(j & 1)); | 2030 win = mdct_win[2] + ((4 * 36) & -(j & 1)); |
2046 buf2 = out + 6; | |
2047 for(k=0;k<3;k++) { | |
2048 /* reorder input for short mdct */ | |
2049 ptr1 = ptr + k; | |
2050 for(i=0;i<6;i++) { | |
2051 in[i] = *ptr1; | |
2052 ptr1 += 3; | |
2053 } | |
2054 imdct12(out2, in); | |
2055 /* apply 12 point window and do small overlap */ | |
2056 for(i=0;i<6;i++) { | |
2057 buf2[i] = MULL(out2[i], win[i]) + buf2[i]; | |
2058 buf2[i + 6] = MULL(out2[i + 6], win[i + 6]); | |
2059 } | |
2060 buf2 += 6; | |
2061 } | |
2062 /* overlap */ | |
2063 out_ptr = sb_samples + j; | 2031 out_ptr = sb_samples + j; |
2064 for(i=0;i<18;i++) { | 2032 |
2065 *out_ptr = out[i] + buf[i]; | 2033 for(i=0; i<6; i++){ |
2066 buf[i] = out[i + 18]; | 2034 *out_ptr = buf[i]; |
2067 out_ptr += SBLIMIT; | 2035 out_ptr += SBLIMIT; |
2036 } | |
2037 imdct12(out2, ptr + 0); | |
2038 for(i=0;i<6;i++) { | |
2039 *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*1]; | |
2040 buf[i + 6*2] = MULH(out2[i + 6], win[i + 6]); | |
2041 out_ptr += SBLIMIT; | |
2042 } | |
2043 imdct12(out2, ptr + 1); | |
2044 for(i=0;i<6;i++) { | |
2045 *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*2]; | |
2046 buf[i + 6*0] = MULH(out2[i + 6], win[i + 6]); | |
2047 out_ptr += SBLIMIT; | |
2048 } | |
2049 imdct12(out2, ptr + 2); | |
2050 for(i=0;i<6;i++) { | |
2051 buf[i + 6*0] = MULH(out2[i], win[i]) + buf[i + 6*0]; | |
2052 buf[i + 6*1] = MULH(out2[i + 6], win[i + 6]); | |
2053 buf[i + 6*2] = 0; | |
2068 } | 2054 } |
2069 ptr += 18; | 2055 ptr += 18; |
2070 buf += 18; | 2056 buf += 18; |
2071 } | 2057 } |
2072 /* zero bands */ | 2058 /* zero bands */ |