comparison snow.c @ 10842:77dbd6ab2db9 libavcodec

Optimize ff_snow_horizontal_compose97i. this makes the 9/7 C wavelet at the decoder side 22% faster. The old code is changed to match the new in terms of the order of operations (which also makes it sligtly faster)
author michael
date Mon, 11 Jan 2010 02:52:50 +0000
parents 8f370ebde166
children b57409c0c286
comparison
equal deleted inserted replaced
10841:8f370ebde166 10842:77dbd6ab2db9
1118 1118
1119 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ 1119 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1120 IDWTELEM temp[width]; 1120 IDWTELEM temp[width];
1121 const int w2= (width+1)>>1; 1121 const int w2= (width+1)>>1;
1122 1122
1123 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); 1123 #if 0 //maybe more understadable but slower
1124 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); 1124 inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1125 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); 1125 inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1);
1126 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); 1126
1127 inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1);
1128 inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0);
1129 #else
1130 int x;
1131 temp[0] = b[0] - ((3*b[w2]+2)>>2);
1132 for(x=1; x<(width>>1); x++){
1133 temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3);
1134 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1135 }
1136 if(width&1){
1137 temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2);
1138 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1139 }else
1140 temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
1141
1142 b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
1143 for(x=2; x<width-1; x+=2){
1144 b[x ] = temp[x ] + ((4*temp[x ] + temp[x-1] + temp[x+1]+8)>>4);
1145 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
1146 }
1147 if(width&1){
1148 b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3);
1149 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
1150 }else
1151 b[x-1] = temp[x-1] + 3*b [x-2];
1152 #endif
1127 } 1153 }
1128 1154
1129 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ 1155 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1130 int i; 1156 int i;
1131 1157