comparison snow.c @ 5634:9960732c7d7b libavcodec

Replace constants by HTAPS (half pel interpolation filter taps) where appropriate.
author michael
date Wed, 05 Sep 2007 00:49:09 +0000
parents 873ea64637d9
children 473cada682a1
comparison
equal deleted inserted replaced
5633:873ea64637d9 5634:9960732c7d7b
392 }; 392 };
393 393
394 #define LOG2_MB_SIZE 4 394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE) 395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4 396 #define ENCODER_EXTRA_BITS 4
397 #define HTAPS 6
397 398
398 typedef struct x_and_coeff{ 399 typedef struct x_and_coeff{
399 int16_t x; 400 int16_t x;
400 uint16_t coeff; 401 uint16_t coeff;
401 } x_and_coeff; 402 } x_and_coeff;
2143 } 2144 }
2144 2145
2145 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ 2146 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2146 int x, y; 2147 int x, y;
2147 START_TIMER 2148 START_TIMER
2148 for(y=0; y < b_h+5; y++){ 2149 for(y=0; y < b_h+HTAPS-1; y++){
2149 for(x=0; x < b_w; x++){ 2150 for(x=0; x < b_w; x++){
2150 int a0= src[x ]; 2151 int a_2=src[x + HTAPS/2-5];
2151 int a1= src[x + 1]; 2152 int a_1=src[x + HTAPS/2-4];
2152 int a2= src[x + 2]; 2153 int a0= src[x + HTAPS/2-3];
2153 int a3= src[x + 3]; 2154 int a1= src[x + HTAPS/2-2];
2154 int a4= src[x + 4]; 2155 int a2= src[x + HTAPS/2-1];
2155 int a5= src[x + 5]; 2156 int a3= src[x + HTAPS/2+0];
2157 int a4= src[x + HTAPS/2+1];
2158 int a5= src[x + HTAPS/2+2];
2159 int a6= src[x + HTAPS/2+3];
2160 int a7= src[x + HTAPS/2+4];
2156 // int am= 9*(a1+a2) - (a0+a3); 2161 // int am= 9*(a1+a2) - (a0+a3);
2157 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); 2162 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2158 // int am= 18*(a2+a3) - 2*(a1+a4); 2163 // int am= 18*(a2+a3) - 2*(a1+a4);
2159 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; 2164 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2160 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3; 2165 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2175 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/ 2180 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2176 } 2181 }
2177 tmp += stride; 2182 tmp += stride;
2178 src += stride; 2183 src += stride;
2179 } 2184 }
2180 tmp -= (b_h+5)*stride; 2185 tmp -= (b_h+HTAPS-1)*stride;
2181 2186
2182 for(y=0; y < b_h; y++){ 2187 for(y=0; y < b_h; y++){
2183 for(x=0; x < b_w; x++){ 2188 for(x=0; x < b_w; x++){
2184 int a0= tmp[x + 0*stride]; 2189 int a_2=tmp[x + (HTAPS/2-5)*stride];
2185 int a1= tmp[x + 1*stride]; 2190 int a_1=tmp[x + (HTAPS/2-4)*stride];
2186 int a2= tmp[x + 2*stride]; 2191 int a0= tmp[x + (HTAPS/2-3)*stride];
2187 int a3= tmp[x + 3*stride]; 2192 int a1= tmp[x + (HTAPS/2-2)*stride];
2188 int a4= tmp[x + 4*stride]; 2193 int a2= tmp[x + (HTAPS/2-1)*stride];
2189 int a5= tmp[x + 5*stride]; 2194 int a3= tmp[x + (HTAPS/2+0)*stride];
2195 int a4= tmp[x + (HTAPS/2+1)*stride];
2196 int a5= tmp[x + (HTAPS/2+2)*stride];
2197 int a6= tmp[x + (HTAPS/2+3)*stride];
2198 int a7= tmp[x + (HTAPS/2+4)*stride];
2190 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); 2199 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2191 // int am= 18*(a2+a3) - 2*(a1+a4); 2200 // int am= 18*(a2+a3) - 2*(a1+a4);
2192 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; 2201 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2193 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/ 2202 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2194 2203
2211 STOP_TIMER("mc_block") 2220 STOP_TIMER("mc_block")
2212 } 2221 }
2213 2222
2214 #define mca(dx,dy,b_w)\ 2223 #define mca(dx,dy,b_w)\
2215 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ 2224 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2216 uint8_t tmp[stride*(b_w+5)];\ 2225 uint8_t tmp[stride*(b_w+HTAPS-1)];\
2217 assert(h==b_w);\ 2226 assert(h==b_w);\
2218 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\ 2227 mc_block(dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2219 } 2228 }
2220 2229
2221 mca( 0, 0,16) 2230 mca( 0, 0,16)
2222 mca( 8, 0,16) 2231 mca( 8, 0,16)
2223 mca( 0, 8,16) 2232 mca( 0, 8,16)
2272 int mx= block->mx*scale; 2281 int mx= block->mx*scale;
2273 int my= block->my*scale; 2282 int my= block->my*scale;
2274 const int dx= mx&15; 2283 const int dx= mx&15;
2275 const int dy= my&15; 2284 const int dy= my&15;
2276 const int tab_index= 3 - (b_w>>2) + (b_w>>4); 2285 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2277 sx += (mx>>4) - 2; 2286 sx += (mx>>4) - (HTAPS/2-1);
2278 sy += (my>>4) - 2; 2287 sy += (my>>4) - (HTAPS/2-1);
2279 src += sx + sy*stride; 2288 src += sx + sy*stride;
2280 if( (unsigned)sx >= w - b_w - 4 2289 if( (unsigned)sx >= w - b_w - (HTAPS-2)
2281 || (unsigned)sy >= h - b_h - 4){ 2290 || (unsigned)sy >= h - b_h - (HTAPS-2)){
2282 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h); 2291 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS-1, b_h+HTAPS-1, sx, sy, w, h);
2283 src= tmp + MB_SIZE; 2292 src= tmp + MB_SIZE;
2284 } 2293 }
2285 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); 2294 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2286 // assert(!(b_w&(b_w-1))); 2295 // assert(!(b_w&(b_w-1)));
2287 assert(b_w>1 && b_h>1); 2296 assert(b_w>1 && b_h>1);
2288 assert(tab_index>=0 && tab_index<4 || b_w==32); 2297 assert(tab_index>=0 && tab_index<4 || b_w==32);
2289 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1))) 2298 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || HTAPS != 6)
2290 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); 2299 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2291 else if(b_w==32){ 2300 else if(b_w==32){
2292 int y; 2301 int y;
2293 for(y=0; y<b_h; y+=16){ 2302 for(y=0; y<b_h; y+=16){
2294 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride); 2303 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2731 const int ref_stride= s->current_picture.linesize[plane_index]; 2740 const int ref_stride= s->current_picture.linesize[plane_index];
2732 uint8_t *dst= s->current_picture.data[plane_index]; 2741 uint8_t *dst= s->current_picture.data[plane_index];
2733 uint8_t *src= s-> input_picture.data[plane_index]; 2742 uint8_t *src= s-> input_picture.data[plane_index];
2734 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; 2743 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2735 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment 2744 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2736 uint8_t tmp[ref_stride*(2*MB_SIZE+5)]; 2745 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS-1)];
2737 const int b_stride = s->b_width << s->block_max_depth; 2746 const int b_stride = s->b_width << s->block_max_depth;
2738 const int b_height = s->b_height<< s->block_max_depth; 2747 const int b_height = s->b_height<< s->block_max_depth;
2739 const int w= p->width; 2748 const int w= p->width;
2740 const int h= p->height; 2749 const int h= p->height;
2741 int distortion; 2750 int distortion;