Mercurial > libavcodec.hg
comparison snow.c @ 5648:cd26ab6e3953 libavcodec
cleanup mc_block()
perform interpolation steps in such an order that halfpel interpolation
could be done per picture
this also makes mc_block() match h.264 for the 1/4 pel cases so that the
use of the h264 functions for some cases does not introduce a fantastic mess
author | michael |
---|---|
date | Sat, 08 Sep 2007 03:14:20 +0000 |
parents | 473cada682a1 |
children | 9fe214a99139 |
comparison
equal
deleted
inserted
replaced
5647:7c139ea9065e | 5648:cd26ab6e3953 |
---|---|
2142 } | 2142 } |
2143 } | 2143 } |
2144 } | 2144 } |
2145 | 2145 |
2146 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ | 2146 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ |
2147 int x, y; | 2147 const static uint8_t weight[64]={ |
2148 8,7,6,5,4,3,2,1, | |
2149 7,7,0,0,0,0,0,1, | |
2150 6,0,6,0,0,0,2,0, | |
2151 5,0,0,5,0,3,0,0, | |
2152 4,0,0,0,4,0,0,0, | |
2153 3,0,0,5,0,3,0,0, | |
2154 2,0,6,0,0,0,2,0, | |
2155 1,7,0,0,0,0,0,1, | |
2156 }; | |
2157 | |
2158 const static uint8_t brane[256]={ | |
2159 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, | |
2160 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
2161 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
2162 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
2163 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
2164 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
2165 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
2166 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
2167 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
2168 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
2169 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
2170 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
2171 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
2172 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
2173 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
2174 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
2175 }; | |
2176 | |
2177 const static uint8_t needs[16]={ | |
2178 0,1,0,0, | |
2179 2,4,2,0, | |
2180 0,1,0,0, | |
2181 15 | |
2182 }; | |
2183 | |
2184 int x, y, b, r, l; | |
2185 int16_t tmpIt [64*(32+HTAPS)]; | |
2186 uint8_t tmp2t[3][stride*(32+HTAPS)]; | |
2187 int16_t *tmpI= tmpIt; | |
2188 uint8_t *tmp2= tmp2t[0]; | |
2189 uint8_t *hpel[11]; | |
2148 START_TIMER | 2190 START_TIMER |
2191 assert(dx<16 && dy<16); | |
2192 r= brane[dx + 16*dy]&15; | |
2193 l= brane[dx + 16*dy]>>4; | |
2194 | |
2195 b= needs[l] | needs[r]; | |
2196 | |
2197 if(b&5){ | |
2149 for(y=0; y < b_h+HTAPS-1; y++){ | 2198 for(y=0; y < b_h+HTAPS-1; y++){ |
2150 for(x=0; x < b_w; x++){ | 2199 for(x=0; x < b_w; x++){ |
2151 int a_2=src[x + HTAPS/2-5]; | 2200 int a_2=src[x + HTAPS/2-5]; |
2152 int a_1=src[x + HTAPS/2-4]; | 2201 int a_1=src[x + HTAPS/2-4]; |
2153 int a0= src[x + HTAPS/2-3]; | 2202 int a0= src[x + HTAPS/2-3]; |
2168 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; | 2217 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; |
2169 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3; | 2218 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3; |
2170 | 2219 |
2171 // if(b_w==16) am= 8*(a1+a2); | 2220 // if(b_w==16) am= 8*(a1+a2); |
2172 | 2221 |
2173 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8; | 2222 tmpI[x]= am; |
2174 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8; | 2223 am= (am+16)>>5; |
2175 | |
2176 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/ | |
2177 if(am&(~255)) am= ~(am>>31); | 2224 if(am&(~255)) am= ~(am>>31); |
2178 | 2225 tmp2[x]= am; |
2179 tmp[x] = am; | 2226 } |
2180 | 2227 tmpI+= 64; |
2181 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6; | 2228 tmp2+= stride; |
2182 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6; | |
2183 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6; | |
2184 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/ | |
2185 } | |
2186 tmp += stride; | |
2187 src += stride; | 2229 src += stride; |
2188 } | 2230 } |
2189 tmp -= (b_h+HTAPS-1)*stride; | 2231 src -= stride*y; |
2190 | 2232 } |
2233 src += HTAPS/2 - 1; | |
2234 tmp2= tmp2t[1]; | |
2235 | |
2236 if(b&2){ | |
2191 for(y=0; y < b_h; y++){ | 2237 for(y=0; y < b_h; y++){ |
2192 for(x=0; x < b_w; x++){ | 2238 for(x=0; x < b_w+1; x++){ |
2193 int a_2=tmp[x + (HTAPS/2-5)*stride]; | 2239 int a_2=src[x + (HTAPS/2-5)*stride]; |
2194 int a_1=tmp[x + (HTAPS/2-4)*stride]; | 2240 int a_1=src[x + (HTAPS/2-4)*stride]; |
2195 int a0= tmp[x + (HTAPS/2-3)*stride]; | 2241 int a0= src[x + (HTAPS/2-3)*stride]; |
2196 int a1= tmp[x + (HTAPS/2-2)*stride]; | 2242 int a1= src[x + (HTAPS/2-2)*stride]; |
2197 int a2= tmp[x + (HTAPS/2-1)*stride]; | 2243 int a2= src[x + (HTAPS/2-1)*stride]; |
2198 int a3= tmp[x + (HTAPS/2+0)*stride]; | 2244 int a3= src[x + (HTAPS/2+0)*stride]; |
2199 int a4= tmp[x + (HTAPS/2+1)*stride]; | 2245 int a4= src[x + (HTAPS/2+1)*stride]; |
2200 int a5= tmp[x + (HTAPS/2+2)*stride]; | 2246 int a5= src[x + (HTAPS/2+2)*stride]; |
2201 int a6= tmp[x + (HTAPS/2+3)*stride]; | 2247 int a6= src[x + (HTAPS/2+3)*stride]; |
2202 int a7= tmp[x + (HTAPS/2+4)*stride]; | 2248 int a7= src[x + (HTAPS/2+4)*stride]; |
2203 #if HTAPS==6 | 2249 #if HTAPS==6 |
2204 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); | 2250 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
2205 #else | 2251 #else |
2206 int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); | 2252 int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); |
2207 #endif | 2253 #endif |
2209 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; | 2255 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; |
2210 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/ | 2256 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/ |
2211 | 2257 |
2212 // if(b_w==16) am= 8*(a1+a2); | 2258 // if(b_w==16) am= 8*(a1+a2); |
2213 | 2259 |
2214 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8; | 2260 am= (am + 16)>>5; |
2215 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8; | |
2216 | |
2217 if(am&(~255)) am= ~(am>>31); | 2261 if(am&(~255)) am= ~(am>>31); |
2218 | 2262 tmp2[x]= am; |
2219 dst[x] = am; | 2263 } |
2220 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6; | 2264 src += stride; |
2221 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6; | 2265 tmp2+= stride; |
2222 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6; | 2266 } |
2223 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/ | 2267 src -= stride*y; |
2224 } | 2268 } |
2225 dst += stride; | 2269 src += stride*(HTAPS/2 - 1); |
2226 tmp += stride; | 2270 tmp2= tmp2t[2]; |
2271 tmpI= tmpIt; | |
2272 if(b&4){ | |
2273 for(y=0; y < b_h; y++){ | |
2274 for(x=0; x < b_w; x++){ | |
2275 int a_2=tmpI[x + (HTAPS/2-5)*64]; | |
2276 int a_1=tmpI[x + (HTAPS/2-4)*64]; | |
2277 int a0= tmpI[x + (HTAPS/2-3)*64]; | |
2278 int a1= tmpI[x + (HTAPS/2-2)*64]; | |
2279 int a2= tmpI[x + (HTAPS/2-1)*64]; | |
2280 int a3= tmpI[x + (HTAPS/2+0)*64]; | |
2281 int a4= tmpI[x + (HTAPS/2+1)*64]; | |
2282 int a5= tmpI[x + (HTAPS/2+2)*64]; | |
2283 int a6= tmpI[x + (HTAPS/2+3)*64]; | |
2284 int a7= tmpI[x + (HTAPS/2+4)*64]; | |
2285 #if HTAPS==6 | |
2286 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); | |
2287 #else | |
2288 int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); | |
2289 #endif | |
2290 am= (am + 512)>>10; | |
2291 if(am&(~255)) am= ~(am>>31); | |
2292 tmp2[x]= am; | |
2293 } | |
2294 tmpI+= 64; | |
2295 tmp2+= stride; | |
2296 } | |
2297 } | |
2298 | |
2299 hpel[ 0]= src; | |
2300 hpel[ 1]= tmp2t[0] + stride*(HTAPS/2-1); | |
2301 hpel[ 2]= src + 1; | |
2302 | |
2303 hpel[ 4]= tmp2t[1]; | |
2304 hpel[ 5]= tmp2t[2]; | |
2305 hpel[ 6]= tmp2t[1] + 1; | |
2306 | |
2307 hpel[ 8]= src + stride; | |
2308 hpel[ 9]= hpel[1] + stride; | |
2309 hpel[10]= hpel[8] + 1; | |
2310 | |
2311 if(b==15){ | |
2312 uint8_t *src1= hpel[dx/8 + dy/8*4 ]; | |
2313 uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
2314 uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
2315 uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
2316 dx&=7; | |
2317 dy&=7; | |
2318 for(y=0; y < b_h; y++){ | |
2319 for(x=0; x < b_w; x++){ | |
2320 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
2321 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
2322 } | |
2323 src1+=stride; | |
2324 src2+=stride; | |
2325 src3+=stride; | |
2326 src4+=stride; | |
2327 dst +=stride; | |
2328 } | |
2329 }else{ | |
2330 uint8_t *src1= hpel[l]; | |
2331 uint8_t *src2= hpel[r]; | |
2332 int a= weight[((dx&7) + (8*(dy&7)))]; | |
2333 int b= 8-a; | |
2334 for(y=0; y < b_h; y++){ | |
2335 for(x=0; x < b_w; x++){ | |
2336 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
2337 } | |
2338 src1+=stride; | |
2339 src2+=stride; | |
2340 dst +=stride; | |
2341 } | |
2227 } | 2342 } |
2228 STOP_TIMER("mc_block") | 2343 STOP_TIMER("mc_block") |
2229 } | 2344 } |
2230 | 2345 |
2231 #define mca(dx,dy,b_w)\ | 2346 #define mca(dx,dy,b_w)\ |