Mercurial > libavcodec.hg
comparison snow.c @ 10842:77dbd6ab2db9 libavcodec
Optimize ff_snow_horizontal_compose97i.
this makes the 9/7 C wavelet at the decoder side 22% faster.
The old code is changed to match the new in terms of the order of operations
(which also makes it sligtly faster)
author | michael |
---|---|
date | Mon, 11 Jan 2010 02:52:50 +0000 |
parents | 8f370ebde166 |
children | b57409c0c286 |
comparison
equal
deleted
inserted
replaced
10841:8f370ebde166 | 10842:77dbd6ab2db9 |
---|---|
1118 | 1118 |
1119 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ | 1119 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
1120 IDWTELEM temp[width]; | 1120 IDWTELEM temp[width]; |
1121 const int w2= (width+1)>>1; | 1121 const int w2= (width+1)>>1; |
1122 | 1122 |
1123 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); | 1123 #if 0 //maybe more understadable but slower |
1124 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); | 1124 inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
1125 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); | 1125 inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1); |
1126 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); | 1126 |
1127 inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1); | |
1128 inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0); | |
1129 #else | |
1130 int x; | |
1131 temp[0] = b[0] - ((3*b[w2]+2)>>2); | |
1132 for(x=1; x<(width>>1); x++){ | |
1133 temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3); | |
1134 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x]; | |
1135 } | |
1136 if(width&1){ | |
1137 temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2); | |
1138 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x]; | |
1139 }else | |
1140 temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2]; | |
1141 | |
1142 b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3); | |
1143 for(x=2; x<width-1; x+=2){ | |
1144 b[x ] = temp[x ] + ((4*temp[x ] + temp[x-1] + temp[x+1]+8)>>4); | |
1145 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1); | |
1146 } | |
1147 if(width&1){ | |
1148 b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3); | |
1149 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1); | |
1150 }else | |
1151 b[x-1] = temp[x-1] + 3*b [x-2]; | |
1152 #endif | |
1127 } | 1153 } |
1128 | 1154 |
1129 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ | 1155 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
1130 int i; | 1156 int i; |
1131 | 1157 |