comparison motion_est.c @ 11573:a734f92c94b4 libavcodec

slice dice, inline and outline cmp() motion_est.o is now less than half its previous size. No speedchange meassureable.
author michael
date Fri, 02 Apr 2010 01:07:03 +0000
parents 31033caa5344
children 7b86b341edd0
comparison
equal deleted inserted replaced
11572:e00f9288527a 11573:a734f92c94b4
105 } 105 }
106 106
107 /*! \brief compares a block (either a full macroblock or a partition thereof) 107 /*! \brief compares a block (either a full macroblock or a partition thereof)
108 against a proposed motion-compensated prediction of that block 108 against a proposed motion-compensated prediction of that block
109 */ 109 */
110 static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby, 110 static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
111 const int size, const int h, int ref_index, int src_index, 111 const int size, const int h, int ref_index, int src_index,
112 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){ 112 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
113 MotionEstContext * const c= &s->me; 113 MotionEstContext * const c= &s->me;
114 const int stride= c->stride; 114 const int stride= c->stride;
115 const int uvstride= c->uvstride;
116 const int qpel= flags&FLAG_QPEL;
117 const int chroma= flags&FLAG_CHROMA;
118 const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
119 const int hx= subx + (x<<(1+qpel)); 115 const int hx= subx + (x<<(1+qpel));
120 const int hy= suby + (y<<(1+qpel)); 116 const int hy= suby + (y<<(1+qpel));
121 uint8_t * const * const ref= c->ref[ref_index]; 117 uint8_t * const * const ref= c->ref[ref_index];
122 uint8_t * const * const src= c->src[src_index]; 118 uint8_t * const * const src= c->src[src_index];
123 int d; 119 int d;
124 //FIXME check chroma 4mv, (no crashes ...) 120 //FIXME check chroma 4mv, (no crashes ...)
125 if(flags&FLAG_DIRECT){
126 assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)); 121 assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
127 if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){ 122 if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
128 const int time_pp= s->pp_time; 123 const int time_pp= s->pp_time;
129 const int time_pb= s->pb_time; 124 const int time_pb= s->pb_time;
130 const int mask= 2*qpel+1; 125 const int mask= 2*qpel+1;
179 } 174 }
180 } 175 }
181 d = cmp_func(s, c->temp, src[0], stride, 16); 176 d = cmp_func(s, c->temp, src[0], stride, 16);
182 }else 177 }else
183 d= 256*256*256*32; 178 d= 256*256*256*32;
184 }else{ 179 return d;
180 }
181
182 static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
183 const int size, const int h, int ref_index, int src_index,
184 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
185 MotionEstContext * const c= &s->me;
186 const int stride= c->stride;
187 const int uvstride= c->uvstride;
188 const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
189 const int hx= subx + (x<<(1+qpel));
190 const int hy= suby + (y<<(1+qpel));
191 uint8_t * const * const ref= c->ref[ref_index];
192 uint8_t * const * const src= c->src[src_index];
193 int d;
194 //FIXME check chroma 4mv, (no crashes ...)
185 int uvdxy; /* no, it might not be used uninitialized */ 195 int uvdxy; /* no, it might not be used uninitialized */
186 if(dxy){ 196 if(dxy){
187 if(qpel){ 197 if(qpel){
188 c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h) 198 c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
189 if(chroma){ 199 if(chroma){
210 c->hpel_put[size+1][uvdxy](uvtemp , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1); 220 c->hpel_put[size+1][uvdxy](uvtemp , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
211 c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1); 221 c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
212 d += chroma_cmp_func(s, uvtemp , src[1], uvstride, h>>1); 222 d += chroma_cmp_func(s, uvtemp , src[1], uvstride, h>>1);
213 d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1); 223 d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
214 } 224 }
215 }
216 #if 0
217 if(full_pel){
218 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
219 score_map[index]= d;
220 }
221
222 d += (c->mv_penalty[hx - c->pred_x] + c->mv_penalty[hy - c->pred_y])*c->penalty_factor;
223 #endif
224 return d; 225 return d;
226 }
227
228 static int cmp_simple(MpegEncContext *s, const int x, const int y,
229 int ref_index, int src_index,
230 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
231 return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
232 }
233
234 static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
235 const int size, const int h, int ref_index, int src_index,
236 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
237 if(flags&FLAG_DIRECT){
238 return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
239 }else{
240 return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
241 }
242 }
243
244 static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
245 const int size, const int h, int ref_index, int src_index,
246 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
247 if(flags&FLAG_DIRECT){
248 return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
249 }else{
250 return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
251 }
252 }
253
254 static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
255 const int size, const int h, int ref_index, int src_index,
256 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
257 if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
258 && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
259 && flags==0 && h==16 && size==0 && subx==0 && suby==0){
260 return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
261 }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
262 && subx==0 && suby==0){
263 return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
264 }else{
265 return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
266 }
267 }
268
269 static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
270 const int size, const int h, int ref_index, int src_index,
271 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
272 if(flags&FLAG_DIRECT){
273 return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
274 }else{
275 return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
276 }
277 }
278
279 static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
280 const int size, const int h, int ref_index, int src_index,
281 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
282 if(flags&FLAG_DIRECT){
283 return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
284 }else{
285 return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
286 }
225 } 287 }
226 288
227 #include "motion_est_template.c" 289 #include "motion_est_template.c"
228 290
229 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ 291 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){