comparison motion_est.c @ 1950:a3c60fa850dc libavcodec

motion estimation cleanup replace ugly macros by always_inline functions, that way its much more readable and flexible as always_inline can simply be removed while the macros couldnt be about 0.5 % speedup with default parameters
author michael
date Thu, 22 Apr 2004 03:31:29 +0000
parents e2501e6e7ff7
children 5dafb10e0252
comparison
equal deleted inserted replaced
1949:66215baae7b9 1950:a3c60fa850dc
31 #include <limits.h> 31 #include <limits.h>
32 #include "avcodec.h" 32 #include "avcodec.h"
33 #include "dsputil.h" 33 #include "dsputil.h"
34 #include "mpegvideo.h" 34 #include "mpegvideo.h"
35 35
36 //#undef NDEBUG 36 #undef NDEBUG
37 //#include <assert.h> 37 #include <assert.h>
38 38
39 #define SQ(a) ((a)*(a)) 39 #define SQ(a) ((a)*(a))
40 40
41 #define P_LEFT P[1] 41 #define P_LEFT P[1]
42 #define P_TOP P[2] 42 #define P_TOP P[2]
44 #define P_MEDIAN P[4] 44 #define P_MEDIAN P[4]
45 #define P_MV1 P[9] 45 #define P_MV1 P[9]
46 46
47 static inline int sad_hpel_motion_search(MpegEncContext * s, 47 static inline int sad_hpel_motion_search(MpegEncContext * s,
48 int *mx_ptr, int *my_ptr, int dmin, 48 int *mx_ptr, int *my_ptr, int dmin,
49 int pred_x, int pred_y, uint8_t *src_data[3], 49 int src_index, int ref_index,
50 uint8_t *ref_data[6], int stride, int uvstride, 50 int size, int h);
51 int size, int h, uint8_t * const mv_penalty);
52 51
53 static inline int update_map_generation(MpegEncContext * s) 52 static inline int update_map_generation(MpegEncContext * s)
54 { 53 {
55 s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2); 54 s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
56 if(s->me.map_generation==0){ 55 if(s->me.map_generation==0){
71 const Minima *da = (const Minima *) a; 70 const Minima *da = (const Minima *) a;
72 const Minima *db = (const Minima *) b; 71 const Minima *db = (const Minima *) b;
73 72
74 return da->height - db->height; 73 return da->height - db->height;
75 } 74 }
76 75
77 /* SIMPLE */ 76 #define FLAG_QPEL 1 //must be 1
78 #define RENAME(a) simple_ ## a 77 #define FLAG_CHROMA 2
79 78 #define FLAG_DIRECT 4
80 #define CMP(d, x, y, size)\ 79
81 d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h); 80 static inline void init_ref(MpegEncContext *s, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
82 81 MotionEstContext * const c= &s->me;
83 #define CMP_HPEL(d, dx, dy, x, y, size)\ 82 const int offset[3]= {
84 {\ 83 y*c-> stride + x,
85 const int dxy= (dx) + 2*(dy);\ 84 ((y*c->uvstride + x)>>1),
86 hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\ 85 ((y*c->uvstride + x)>>1),
87 d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ 86 };
88 } 87 int i;
89 88 for(i=0; i<3; i++){
90 89 c->src[0][i]= src [i] + offset[i];
91 #define CMP_QPEL(d, dx, dy, x, y, size)\ 90 c->ref[0][i]= ref [i] + offset[i];
92 {\ 91 }
93 const int dxy= (dx) + 4*(dy);\ 92 if(ref_index){
94 qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ 93 for(i=0; i<3; i++){
95 d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ 94 c->ref[ref_index][i]= ref2[i] + offset[i];
95 }
96 }
97 }
98
99 static int get_flags(MpegEncContext *s, int direct, int chroma){
100 return ((s->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
101 + (direct ? FLAG_DIRECT : 0)
102 + (chroma ? FLAG_CHROMA : 0);
103 }
104
105 static inline void init_mc(MpegEncContext *s, int size, int flags){
106 MotionEstContext * const c= &s->me;
107
108 /*FIXME s->no_rounding b_type*/
109 if(flags & FLAG_CHROMA){
110 if(s->no_rounding) c->chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
111 else c->chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
112 }
113
114 if(flags & FLAG_QPEL){
115 c->qpel_avg= &s->dsp.avg_qpel_pixels_tab[size];
116 if(s->no_rounding) c->qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];
117 else c->qpel_put= &s->dsp.put_qpel_pixels_tab[size];
118 }else{
119 c->hpel_avg= &s->dsp.avg_pixels_tab[size];
120 if(s->no_rounding) c->hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
121 else c->hpel_put= &s->dsp.put_pixels_tab[size];
122 }
123 c->temp= c->scratchpad;
124 }
125
126 static always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
127 const int size, const int h, int ref_index, int src_index,
128 me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
129 MotionEstContext * const c= &s->me;
130 const int stride= c->stride;
131 const int uvstride= c->uvstride;
132 const int qpel= flags&FLAG_QPEL;
133 const int chroma= flags&FLAG_CHROMA;
134 const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
135 const int hx= subx + (x<<(1+qpel));
136 const int hy= suby + (y<<(1+qpel));
137 uint8_t * const * const ref= c->ref[ref_index];
138 uint8_t * const * const src= c->src[src_index];
139 int d;
140 //FIXME check chroma 4mv, (no crashes ...)
141 if(flags&FLAG_DIRECT){
142 if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
143 const int time_pp= s->pp_time;
144 const int time_pb= s->pb_time;
145 const int mask= 2*qpel+1;
146 if(s->mv_type==MV_TYPE_8X8){
147 int i;
148 for(i=0; i<4; i++){
149 int fx = c->direct_basis_mv[i][0] + hx;
150 int fy = c->direct_basis_mv[i][1] + hy;
151 int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
152 int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
153 int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
154 int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
155
156 uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
157 if(qpel){
158 c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
159 c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
160 }else{
161 c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
162 c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
163 }
164 }
165 }else{
166 int fx = c->direct_basis_mv[0][0] + hx;
167 int fy = c->direct_basis_mv[0][1] + hy;
168 int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
169 int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
170 int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
171 int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
172
173 if(qpel){
174 c->qpel_put[1][fxy](c->temp , ref[0] + (fx>>2) + (fy>>2)*stride , stride);
175 c->qpel_put[1][fxy](c->temp + 8 , ref[0] + (fx>>2) + (fy>>2)*stride + 8 , stride);
176 c->qpel_put[1][fxy](c->temp + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8*stride, stride);
177 c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
178 c->qpel_avg[1][bxy](c->temp , ref[8] + (bx>>2) + (by>>2)*stride , stride);
179 c->qpel_avg[1][bxy](c->temp + 8 , ref[8] + (bx>>2) + (by>>2)*stride + 8 , stride);
180 c->qpel_avg[1][bxy](c->temp + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8*stride, stride);
181 c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
182 }else{
183 assert((fx>>1) + 16*s->mb_x >= -16);
184 assert((fy>>1) + 16*s->mb_y >= -16);
185 assert((fx>>1) + 16*s->mb_x <= s->width);
186 assert((fy>>1) + 16*s->mb_y <= s->height);
187 assert((bx>>1) + 16*s->mb_x >= -16);
188 assert((by>>1) + 16*s->mb_y >= -16);
189 assert((bx>>1) + 16*s->mb_x <= s->width);
190 assert((by>>1) + 16*s->mb_y <= s->height);
191
192 c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
193 c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
194 }
195 }
196 d = cmp_func(s, c->temp, src[0], stride, 16);
197 }else
198 d= 256*256*256*32;
199 }else{
200 int uvdxy;
201 if(dxy){
202 if(qpel){
203 c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
204 if(chroma){
205 int cx= hx/2;
206 int cy= hy/2;
207 cx= (cx>>1)|(cx&1);
208 cy= (cy>>1)|(cy&1);
209 uvdxy= (cx&1) + 2*(cy&1);
210 //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
211 }
212 }else{
213 c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
214 if(chroma)
215 uvdxy= dxy | (x&1) | (2*(y&1));
216 }
217 d = cmp_func(s, c->temp, src[0], stride, h);
218 }else{
219 d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
220 if(chroma)
221 uvdxy= (x&1) + 2*(y&1);
222 }
223 if(chroma){
224 uint8_t * const uvtemp= c->temp + 16*stride;
225 c->hpel_put[size+1][uvdxy](uvtemp , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
226 c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
227 d += chroma_cmp_func(s, uvtemp , src[1], uvstride, h>>1);
228 d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
229 }
230 }
231 #if 0
232 if(full_pel){
233 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
234 score_map[index]= d;
235 }
236
237 d += (c->mv_penalty[hx - c->pred_x] + c->mv_penalty[hy - c->pred_y])*c->penalty_factor;
238 #endif
239 return d;
96 } 240 }
97 241
98 #include "motion_est_template.c" 242 #include "motion_est_template.c"
99 #undef RENAME
100 #undef CMP
101 #undef CMP_HPEL
102 #undef CMP_QPEL
103 #undef INIT
104
105 /* SIMPLE CHROMA */
106 #define RENAME(a) simple_chroma_ ## a
107
108 #define CMP(d, x, y, size)\
109 d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);\
110 if(chroma_cmp){\
111 int dxy= ((x)&1) + 2*((y)&1);\
112 int c= ((x)>>1) + ((y)>>1)*uvstride;\
113 \
114 chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
115 d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride, h>>1);\
116 chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
117 d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride, h>>1);\
118 }
119
120 #define CMP_HPEL(d, dx, dy, x, y, size)\
121 {\
122 const int dxy= (dx) + 2*(dy);\
123 hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
124 d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
125 if(chroma_cmp_sub){\
126 int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
127 int c= ((x)>>1) + ((y)>>1)*uvstride;\
128 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
129 d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
130 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
131 d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
132 }\
133 }
134
135 #define CMP_QPEL(d, dx, dy, x, y, size)\
136 {\
137 const int dxy= (dx) + 4*(dy);\
138 qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
139 d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
140 if(chroma_cmp_sub){\
141 int cxy, c;\
142 int cx= (4*(x) + (dx))/2;\
143 int cy= (4*(y) + (dy))/2;\
144 cx= (cx>>1)|(cx&1);\
145 cy= (cy>>1)|(cy&1);\
146 cxy= (cx&1) + 2*(cy&1);\
147 c= ((cx)>>1) + ((cy)>>1)*uvstride;\
148 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
149 d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
150 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
151 d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
152 }\
153 }
154
155 #include "motion_est_template.c"
156 #undef RENAME
157 #undef CMP
158 #undef CMP_HPEL
159 #undef CMP_QPEL
160 #undef INIT
161
162 /* SIMPLE DIRECT HPEL */
163 #define RENAME(a) simple_direct_hpel_ ## a
164 //FIXME precalc divisions stuff
165
166 #define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
167 if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*ymax){\
168 const int hx= 2*(x) + (dx);\
169 const int hy= 2*(y) + (dy);\
170 if(s->mv_type==MV_TYPE_8X8){\
171 int i;\
172 for(i=0; i<4; i++){\
173 int fx = s->me.direct_basis_mv[i][0] + hx;\
174 int fy = s->me.direct_basis_mv[i][1] + hy;\
175 int bx = hx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
176 int by = hy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
177 int fxy= (fx&1) + 2*(fy&1);\
178 int bxy= (bx&1) + 2*(by&1);\
179 \
180 uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
181 hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
182 hpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 8);\
183 }\
184 }else{\
185 int fx = s->me.direct_basis_mv[0][0] + hx;\
186 int fy = s->me.direct_basis_mv[0][1] + hy;\
187 int bx = hx ? fx - s->me.co_located_mv[0][0] : (s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp);\
188 int by = hy ? fy - s->me.co_located_mv[0][1] : (s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp);\
189 int fxy= (fx&1) + 2*(fy&1);\
190 int bxy= (bx&1) + 2*(by&1);\
191 \
192 assert((fx>>1) + 16*s->mb_x >= -16);\
193 assert((fy>>1) + 16*s->mb_y >= -16);\
194 assert((fx>>1) + 16*s->mb_x <= s->width);\
195 assert((fy>>1) + 16*s->mb_y <= s->height);\
196 assert((bx>>1) + 16*s->mb_x >= -16);\
197 assert((by>>1) + 16*s->mb_y >= -16);\
198 assert((bx>>1) + 16*s->mb_x <= s->width);\
199 assert((by>>1) + 16*s->mb_y <= s->height);\
200 \
201 hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
202 hpel_avg[0][bxy](s->me.scratchpad, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 16);\
203 }\
204 d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
205 }else\
206 d= 256*256*256*32;
207
208
209 #define CMP_HPEL(d, dx, dy, x, y, size)\
210 CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
211
212 #define CMP(d, x, y, size)\
213 CMP_DIRECT(d, 0, 0, x, y, size, cmp)
214
215 #include "motion_est_template.c"
216 #undef RENAME
217 #undef CMP
218 #undef CMP_HPEL
219 #undef CMP_QPEL
220 #undef INIT
221 #undef CMP_DIRECT
222
223 /* SIMPLE DIRECT QPEL */
224 #define RENAME(a) simple_direct_qpel_ ## a
225
226 #define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
227 if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*ymax){\
228 const int qx= 4*(x) + (dx);\
229 const int qy= 4*(y) + (dy);\
230 if(s->mv_type==MV_TYPE_8X8){\
231 int i;\
232 for(i=0; i<4; i++){\
233 int fx = s->me.direct_basis_mv[i][0] + qx;\
234 int fy = s->me.direct_basis_mv[i][1] + qy;\
235 int bx = qx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
236 int by = qy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
237 int fxy= (fx&3) + 4*(fy&3);\
238 int bxy= (bx&3) + 4*(by&3);\
239 \
240 uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
241 qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
242 qpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>2) + (by>>2)*(stride), stride);\
243 }\
244 }else{\
245 int fx = s->me.direct_basis_mv[0][0] + qx;\
246 int fy = s->me.direct_basis_mv[0][1] + qy;\
247 int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
248 int by = qy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
249 int fxy= (fx&3) + 4*(fy&3);\
250 int bxy= (bx&3) + 4*(by&3);\
251 \
252 qpel_put[1][fxy](s->me.scratchpad , (ref_y ) + (fx>>2) + (fy>>2)*(stride) , stride);\
253 qpel_put[1][fxy](s->me.scratchpad + 8 , (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 , stride);\
254 qpel_put[1][fxy](s->me.scratchpad + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8*stride, stride);\
255 qpel_put[1][fxy](s->me.scratchpad + 8 + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 + 8*stride, stride);\
256 qpel_avg[1][bxy](s->me.scratchpad , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) , stride);\
257 qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\
258 qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\
259 qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\
260 }\
261 d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
262 }else\
263 d= 256*256*256*32;
264
265
266 #define CMP_QPEL(d, dx, dy, x, y, size)\
267 CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
268
269 #define CMP(d, x, y, size)\
270 CMP_DIRECT(d, 0, 0, x, y, size, cmp)
271
272 #include "motion_est_template.c"
273 #undef RENAME
274 #undef CMP
275 #undef CMP_HPEL
276 #undef CMP_QPEL
277 #undef INIT
278 #undef CMP__DIRECT
279 243
280 static inline int get_penalty_factor(MpegEncContext *s, int type){ 244 static inline int get_penalty_factor(MpegEncContext *s, int type){
281 switch(type&0xFF){ 245 switch(type&0xFF){
282 default: 246 default:
283 case FF_CMP_SAD: 247 case FF_CMP_SAD:
299 void ff_init_me(MpegEncContext *s){ 263 void ff_init_me(MpegEncContext *s){
300 ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp); 264 ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp);
301 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); 265 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
302 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); 266 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
303 ff_set_cmp(&s->dsp, s->dsp.mb_cmp, s->avctx->mb_cmp); 267 ff_set_cmp(&s->dsp, s->dsp.mb_cmp, s->avctx->mb_cmp);
268
269 s->me.flags = get_flags(s, 0, s->avctx->me_cmp &FF_CMP_CHROMA);
270 s->me.sub_flags= get_flags(s, 0, s->avctx->me_sub_cmp&FF_CMP_CHROMA);
271 s->me.mb_flags = get_flags(s, 0, s->avctx->mb_cmp &FF_CMP_CHROMA);
304 272
305 if(s->flags&CODEC_FLAG_QPEL){ 273 if(s->flags&CODEC_FLAG_QPEL){
306 if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) 274 s->me.sub_motion_search= qpel_motion_search;
307 s->me.sub_motion_search= simple_chroma_qpel_motion_search;
308 else
309 s->me.sub_motion_search= simple_qpel_motion_search;
310 }else{ 275 }else{
311 if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) 276 if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
312 s->me.sub_motion_search= simple_chroma_hpel_motion_search; 277 s->me.sub_motion_search= hpel_motion_search;
313 else if( s->avctx->me_sub_cmp == FF_CMP_SAD 278 else if( s->avctx->me_sub_cmp == FF_CMP_SAD
314 && s->avctx-> me_cmp == FF_CMP_SAD 279 && s->avctx-> me_cmp == FF_CMP_SAD
315 && s->avctx-> mb_cmp == FF_CMP_SAD) 280 && s->avctx-> mb_cmp == FF_CMP_SAD)
316 s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles 281 s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
317 else 282 else
318 s->me.sub_motion_search= simple_hpel_motion_search; 283 s->me.sub_motion_search= hpel_motion_search;
319 } 284 }
320 285 if(s->linesize){
321 if(s->avctx->me_cmp&FF_CMP_CHROMA){ 286 s->me.stride = s->linesize;
322 s->me.motion_search[0]= simple_chroma_epzs_motion_search; 287 s->me.uvstride= s->uvlinesize;
323 s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
324 s->me.motion_search[4]= simple_chroma_epzs_motion_search2;
325 }else{ 288 }else{
326 s->me.motion_search[0]= simple_epzs_motion_search; 289 s->me.stride = 16*s->mb_width + 32;
327 s->me.motion_search[1]= simple_epzs_motion_search4; 290 s->me.uvstride= 8*s->mb_width + 16;
328 s->me.motion_search[4]= simple_epzs_motion_search2;
329 }
330
331 if(s->avctx->me_pre_cmp&FF_CMP_CHROMA){
332 s->me.pre_motion_search= simple_chroma_epzs_motion_search;
333 }else{
334 s->me.pre_motion_search= simple_epzs_motion_search;
335 }
336
337 if(s->flags&CODEC_FLAG_QPEL){
338 if(s->avctx->mb_cmp&FF_CMP_CHROMA)
339 s->me.get_mb_score= simple_chroma_qpel_get_mb_score;
340 else
341 s->me.get_mb_score= simple_qpel_get_mb_score;
342 }else{
343 if(s->avctx->mb_cmp&FF_CMP_CHROMA)
344 s->me.get_mb_score= simple_chroma_hpel_get_mb_score;
345 else
346 s->me.get_mb_score= simple_hpel_get_mb_score;
347 } 291 }
348 } 292 }
349 293
350 #if 0 294 #if 0
351 static int pix_dev(uint8_t * pix, int line_size, int mean) 295 static int pix_dev(uint8_t * pix, int line_size, int mean)
609 COPY3_IF_LT(dminh, d, dx, x, dy, y)\ 553 COPY3_IF_LT(dminh, d, dx, x, dy, y)\
610 } 554 }
611 555
612 static inline int sad_hpel_motion_search(MpegEncContext * s, 556 static inline int sad_hpel_motion_search(MpegEncContext * s,
613 int *mx_ptr, int *my_ptr, int dmin, 557 int *mx_ptr, int *my_ptr, int dmin,
614 int pred_x, int pred_y, uint8_t *src_data[3], 558 int src_index, int ref_index,
615 uint8_t *ref_data[6], int stride, int uvstride, 559 int size, int h)
616 int size, int h, uint8_t * const mv_penalty) 560 {
617 {
618 uint32_t *score_map= s->me.score_map;
619 const int penalty_factor= s->me.sub_penalty_factor; 561 const int penalty_factor= s->me.sub_penalty_factor;
620 int mx, my, dminh; 562 int mx, my, dminh;
621 uint8_t *pix, *ptr; 563 uint8_t *pix, *ptr;
622 const int xmin= s->me.xmin; 564 int stride= s->me.stride;
623 const int ymin= s->me.ymin; 565 const int flags= s->me.sub_flags;
624 const int xmax= s->me.xmax; 566 LOAD_COMMON
625 const int ymax= s->me.ymax; 567
568 assert(flags == 0);
626 569
627 if(s->me.skip){ 570 if(s->me.skip){
628 // printf("S"); 571 // printf("S");
629 *mx_ptr = 0; 572 *mx_ptr = 0;
630 *my_ptr = 0; 573 *my_ptr = 0;
631 return dmin; 574 return dmin;
632 } 575 }
633 // printf("N"); 576 // printf("N");
634 577
635 pix = src_data[0]; 578 pix = s->me.src[src_index][0];
636 579
637 mx = *mx_ptr; 580 mx = *mx_ptr;
638 my = *my_ptr; 581 my = *my_ptr;
639 ptr = ref_data[0] + (my * stride) + mx; 582 ptr = s->me.ref[ref_index][0] + (my * stride) + mx;
640 583
641 dminh = dmin; 584 dminh = dmin;
642 585
643 if (mx > xmin && mx < xmax && 586 if (mx > xmin && mx < xmax &&
644 my > ymin && my < ymax) { 587 my > ymin && my < ymax) {
763 } 706 }
764 } 707 }
765 708
766 static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift) 709 static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
767 { 710 {
711 MotionEstContext * const c= &s->me;
768 const int size= 1; 712 const int size= 1;
769 const int h=8; 713 const int h=8;
770 int block; 714 int block;
771 int P[10][2]; 715 int P[10][2];
772 int dmin_sum=0, mx4_sum=0, my4_sum=0; 716 int dmin_sum=0, mx4_sum=0, my4_sum=0;
773 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
774 int same=1; 717 int same=1;
775 const int stride= s->linesize; 718 const int stride= s->linesize;
776 const int uvstride= s->uvlinesize; 719 const int uvstride= s->uvlinesize;
777 720 uint8_t *mv_penalty= s->me.current_mv_penalty;
721
722 c->ref[1][0] = c->ref[0][0] + 8;
723 c->ref[2][0] = c->ref[0][0] + 8*stride;
724 c->ref[3][0] = c->ref[2][0] + 8;
725 c->src[1][0] = c->src[0][0] + 8;
726 c->src[2][0] = c->src[0][0] + 8*stride;
727 c->src[3][0] = c->src[2][0] + 8;
728 init_mc(s, size, c->flags);
729
778 for(block=0; block<4; block++){ 730 for(block=0; block<4; block++){
779 int mx4, my4; 731 int mx4, my4;
780 int pred_x4, pred_y4; 732 int pred_x4, pred_y4;
781 int dmin4; 733 int dmin4;
782 static const int off[4]= {2, 1, 1, -1}; 734 static const int off[4]= {2, 1, 1, -1};
783 const int mot_stride = s->b8_stride; 735 const int mot_stride = s->b8_stride;
784 const int mot_xy = s->block_index[block]; 736 const int mot_xy = s->block_index[block];
785 const int block_x= (block&1);
786 const int block_y= (block>>1);
787 uint8_t *src_data[3]= {
788 s->new_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma?
789 s->new_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
790 s->new_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
791 };
792 uint8_t *ref_data[3]= {
793 s->last_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma?
794 s->last_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
795 s->last_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
796 };
797 737
798 P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0]; 738 P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
799 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; 739 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
800 740
801 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift); 741 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
802 742
803 /* special case for first line */ 743 /* special case for first line */
804 if (s->first_slice_line && block<2) { 744 if (s->first_slice_line && block<2) {
805 pred_x4= P_LEFT[0]; 745 s->me.pred_x= pred_x4= P_LEFT[0];
806 pred_y4= P_LEFT[1]; 746 s->me.pred_y= pred_y4= P_LEFT[1];
807 } else { 747 } else {
808 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0]; 748 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0];
809 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; 749 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
810 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0]; 750 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
811 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1]; 751 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
815 if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift); 755 if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
816 756
817 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 757 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
818 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 758 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
819 759
820 // if(s->out_format == FMT_H263){ 760 s->me.pred_x= pred_x4 = P_MEDIAN[0];
821 pred_x4 = P_MEDIAN[0]; 761 s->me.pred_y= pred_y4 = P_MEDIAN[1];
822 pred_y4 = P_MEDIAN[1];
823 #if 0
824 }else { /* mpeg1 at least */
825 pred_x4= P_LEFT[0];
826 pred_y4= P_LEFT[1];
827 }
828 #endif
829 } 762 }
830 P_MV1[0]= mx; 763 P_MV1[0]= mx;
831 P_MV1[1]= my; 764 P_MV1[1]= my;
832 765
833 dmin4 = s->me.motion_search[1](s, &mx4, &my4, P, pred_x4, pred_y4, 766 dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
834 src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); 767
835 768 dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
836 dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4,
837 pred_x4, pred_y4, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
838 769
839 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0] 770 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
840 && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE){
841 int dxy; 771 int dxy;
842 const int offset= ((block&1) + (block>>1)*stride)*8; 772 const int offset= ((block&1) + (block>>1)*stride)*8;
843 uint8_t *dest_y = s->me.scratchpad + offset; 773 uint8_t *dest_y = s->me.scratchpad + offset;
844 if(s->quarter_sample){ 774 if(s->quarter_sample){
845 uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride; 775 uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
846 dxy = ((my4 & 3) << 2) | (mx4 & 3); 776 dxy = ((my4 & 3) << 2) | (mx4 & 3);
847 777
848 if(s->no_rounding) 778 if(s->no_rounding)
849 s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , stride); 779 s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , stride);
850 else 780 else
851 s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride); 781 s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride);
852 }else{ 782 }else{
853 uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride; 783 uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
854 dxy = ((my4 & 1) << 1) | (mx4 & 1); 784 dxy = ((my4 & 1) << 1) | (mx4 & 1);
855 785
856 if(s->no_rounding) 786 if(s->no_rounding)
857 s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , stride, h); 787 s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , stride, h);
858 else 788 else
903 } 833 }
904 834
905 dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize, 8); 835 dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize, 8);
906 dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8); 836 dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8);
907 } 837 }
838
839 s->me.pred_x= mx;
840 s->me.pred_y= my;
908 841
909 switch(s->avctx->mb_cmp&0xFF){ 842 switch(s->avctx->mb_cmp&0xFF){
910 /*case FF_CMP_SSE: 843 /*case FF_CMP_SSE:
911 return dmin_sum+ 32*s->qscale*s->qscale;*/ 844 return dmin_sum+ 32*s->qscale*s->qscale;*/
912 case FF_CMP_RD: 845 case FF_CMP_RD:
914 default: 847 default:
915 return dmin_sum+ 11*s->me.mb_penalty_factor; 848 return dmin_sum+ 11*s->me.mb_penalty_factor;
916 } 849 }
917 } 850 }
918 851
919 static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint8_t *frame_ref_data[3], 852 static int interlaced_search(MpegEncContext *s, int ref_index,
920 int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int f_code, int mx, int my) 853 int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my)
921 { 854 {
855 MotionEstContext * const c= &s->me;
922 const int size=0; 856 const int size=0;
923 const int h=8; 857 const int h=8;
924 int block; 858 int block;
925 int P[10][2]; 859 int P[10][2];
926 uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV; 860 uint8_t * const mv_penalty= c->current_mv_penalty;
927 int same=1; 861 int same=1;
928 const int stride= 2*s->linesize; 862 const int stride= 2*s->linesize;
929 const int uvstride= 2*s->uvlinesize; 863 const int uvstride= 2*s->uvlinesize;
930 int dmin_sum= 0; 864 int dmin_sum= 0;
931 const int mot_stride= s->mb_stride; 865 const int mot_stride= s->mb_stride;
932 const int xy= s->mb_x + s->mb_y*mot_stride; 866 const int xy= s->mb_x + s->mb_y*mot_stride;
933 867
934 s->me.ymin>>=1; 868 c->ymin>>=1;
935 s->me.ymax>>=1; 869 c->ymax>>=1;
870 c->stride<<=1;
871 c->uvstride<<=1;
872 c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
873 c->src[1][0] = c->src[0][0] + s->linesize;
874 if(c->flags & FLAG_CHROMA){
875 c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
876 c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
877 c->src[1][1] = c->src[0][1] + s->uvlinesize;
878 c->src[1][2] = c->src[0][2] + s->uvlinesize;
879 }
880 init_mc(s, size, c->flags);
936 881
937 for(block=0; block<2; block++){ 882 for(block=0; block<2; block++){
938 int field_select; 883 int field_select;
939 int best_dmin= INT_MAX; 884 int best_dmin= INT_MAX;
940 int best_field= -1; 885 int best_field= -1;
941 886
942 uint8_t *src_data[3]= {
943 frame_src_data[0] + s-> linesize*block,
944 frame_src_data[1] + s->uvlinesize*block,
945 frame_src_data[2] + s->uvlinesize*block
946 };
947
948 for(field_select=0; field_select<2; field_select++){ 887 for(field_select=0; field_select<2; field_select++){
949 int dmin, mx_i, my_i, pred_x, pred_y; 888 int dmin, mx_i, my_i;
950 uint8_t *ref_data[3]= {
951 frame_ref_data[0] + s-> linesize*field_select,
952 frame_ref_data[1] + s->uvlinesize*field_select,
953 frame_ref_data[2] + s->uvlinesize*field_select
954 };
955 int16_t (*mv_table)[2]= mv_tables[block][field_select]; 889 int16_t (*mv_table)[2]= mv_tables[block][field_select];
956 890
957 P_LEFT[0] = mv_table[xy - 1][0]; 891 P_LEFT[0] = mv_table[xy - 1][0];
958 P_LEFT[1] = mv_table[xy - 1][1]; 892 P_LEFT[1] = mv_table[xy - 1][1];
959 if(P_LEFT[0] > (s->me.xmax<<1)) P_LEFT[0] = (s->me.xmax<<1); 893 if(P_LEFT[0] > (c->xmax<<1)) P_LEFT[0] = (c->xmax<<1);
960 894
961 pred_x= P_LEFT[0]; 895 s->me.pred_x= P_LEFT[0];
962 pred_y= P_LEFT[1]; 896 s->me.pred_y= P_LEFT[1];
963 897
964 if(!s->first_slice_line){ 898 if(!s->first_slice_line){
965 P_TOP[0] = mv_table[xy - mot_stride][0]; 899 P_TOP[0] = mv_table[xy - mot_stride][0];
966 P_TOP[1] = mv_table[xy - mot_stride][1]; 900 P_TOP[1] = mv_table[xy - mot_stride][1];
967 P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0]; 901 P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
968 P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1]; 902 P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
969 if(P_TOP[1] > (s->me.ymax<<1)) P_TOP[1] = (s->me.ymax<<1); 903 if(P_TOP[1] > (c->ymax<<1)) P_TOP[1] = (c->ymax<<1);
970 if(P_TOPRIGHT[0] < (s->me.xmin<<1)) P_TOPRIGHT[0]= (s->me.xmin<<1); 904 if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
971 if(P_TOPRIGHT[0] > (s->me.xmax<<1)) P_TOPRIGHT[0]= (s->me.xmax<<1); 905 if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
972 if(P_TOPRIGHT[1] > (s->me.ymax<<1)) P_TOPRIGHT[1]= (s->me.ymax<<1); 906 if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
973 907
974 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 908 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
975 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 909 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
976 } 910 }
977 P_MV1[0]= mx; //FIXME not correct if block != field_select 911 P_MV1[0]= mx; //FIXME not correct if block != field_select
978 P_MV1[1]= my / 2; 912 P_MV1[1]= my / 2;
979 913
980 dmin = s->me.motion_search[4](s, &mx_i, &my_i, P, pred_x, pred_y, 914 dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
981 src_data, ref_data, stride, uvstride, mv_table, (1<<16)>>1, mv_penalty); 915
982 916 dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
983 dmin= s->me.sub_motion_search(s, &mx_i, &my_i, dmin,
984 pred_x, pred_y, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
985 917
986 mv_table[xy][0]= mx_i; 918 mv_table[xy][0]= mx_i;
987 mv_table[xy][1]= my_i; 919 mv_table[xy][1]= my_i;
988 920
989 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0] 921 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
990 && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE){
991 int dxy; 922 int dxy;
992 923
993 //FIXME chroma ME 924 //FIXME chroma ME
994 uint8_t *ref= ref_data[0] + (mx_i>>1) + (my_i>>1)*stride; 925 uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
995 dxy = ((my_i & 1) << 1) | (mx_i & 1); 926 dxy = ((my_i & 1) << 1) | (mx_i & 1);
996 927
997 if(s->no_rounding){ 928 if(s->no_rounding){
998 s->dsp.put_no_rnd_pixels_tab[size][dxy](s->me.scratchpad, ref , stride, h); 929 s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref , stride, h);
999 }else{ 930 }else{
1000 s->dsp.put_pixels_tab [size][dxy](s->me.scratchpad, ref , stride, h); 931 s->dsp.put_pixels_tab [size][dxy](c->scratchpad, ref , stride, h);
1001 } 932 }
1002 dmin= s->dsp.mb_cmp[size](s, src_data[0], s->me.scratchpad, stride, h); 933 dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
1003 dmin+= (mv_penalty[mx_i-pred_x] + mv_penalty[my_i-pred_y] + 1)*s->me.mb_penalty_factor; 934 dmin+= (mv_penalty[mx_i-s->me.pred_x] + mv_penalty[my_i-s->me.pred_y] + 1)*c->mb_penalty_factor;
1004 }else 935 }else
1005 dmin+= s->me.mb_penalty_factor; //field_select bits 936 dmin+= c->mb_penalty_factor; //field_select bits
1006 937
1007 dmin += field_select != block; //slightly prefer same field 938 dmin += field_select != block; //slightly prefer same field
1008 939
1009 if(dmin < best_dmin){ 940 if(dmin < best_dmin){
1010 best_dmin= dmin; 941 best_dmin= dmin;
1022 953
1023 field_select_tables[block][xy]= best_field; 954 field_select_tables[block][xy]= best_field;
1024 dmin_sum += best_dmin; 955 dmin_sum += best_dmin;
1025 } 956 }
1026 957
1027 s->me.ymin<<=1; 958 c->ymin<<=1;
1028 s->me.ymax<<=1; 959 c->ymax<<=1;
960 c->stride>>=1;
961 c->uvstride>>=1;
1029 962
1030 if(same) 963 if(same)
1031 return INT_MAX; 964 return INT_MAX;
1032 965
1033 switch(s->avctx->mb_cmp&0xFF){ 966 switch(s->avctx->mb_cmp&0xFF){
1034 /*case FF_CMP_SSE: 967 /*case FF_CMP_SSE:
1035 return dmin_sum+ 32*s->qscale*s->qscale;*/ 968 return dmin_sum+ 32*s->qscale*s->qscale;*/
1036 case FF_CMP_RD: 969 case FF_CMP_RD:
1037 return dmin_sum; 970 return dmin_sum;
1038 default: 971 default:
1039 return dmin_sum+ 11*s->me.mb_penalty_factor; 972 return dmin_sum+ 11*c->mb_penalty_factor;
1040 } 973 }
1041 } 974 }
1042 975
1043 void ff_estimate_p_frame_motion(MpegEncContext * s, 976 void ff_estimate_p_frame_motion(MpegEncContext * s,
1044 int mb_x, int mb_y) 977 int mb_x, int mb_y)
1045 { 978 {
979 MotionEstContext * const c= &s->me;
1046 uint8_t *pix, *ppix; 980 uint8_t *pix, *ppix;
1047 int sum, varc, vard, mx, my, dmin, xx, yy; 981 int sum, varc, vard, mx, my, dmin, xx, yy;
1048 int pred_x=0, pred_y=0;
1049 int P[10][2]; 982 int P[10][2];
1050 const int shift= 1+s->quarter_sample; 983 const int shift= 1+s->quarter_sample;
1051 int mb_type=0; 984 int mb_type=0;
1052 uint8_t *ref_picture= s->last_picture.data[0];
1053 Picture * const pic= &s->current_picture; 985 Picture * const pic= &s->current_picture;
1054 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; 986
1055 const int stride= s->linesize; 987 init_ref(s, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1056 const int uvstride= s->uvlinesize; 988 init_mc(s, 0, c->flags);
1057 uint8_t *src_data[3]= {
1058 s->new_picture.data[0] + 16*(mb_x + stride*mb_y),
1059 s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1060 s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1061 };
1062 uint8_t *ref_data[3]= {
1063 s->last_picture.data[0] + 16*(mb_x + stride*mb_y),
1064 s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1065 s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1066 };
1067 989
1068 assert(s->quarter_sample==0 || s->quarter_sample==1); 990 assert(s->quarter_sample==0 || s->quarter_sample==1);
991 assert(s->linesize == s->me.stride);
992 assert(s->uvlinesize == s->me.uvstride);
1069 993
1070 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); 994 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
1071 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); 995 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1072 s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); 996 s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
997 s->me.current_mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1073 998
1074 get_limits(s, 16*mb_x, 16*mb_y); 999 get_limits(s, 16*mb_x, 16*mb_y);
1075 s->me.skip=0; 1000 s->me.skip=0;
1076 1001
1077 switch(s->me_method) { 1002 switch(s->me_method) {
1121 1046
1122 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1047 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1123 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1048 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1124 1049
1125 if(s->out_format == FMT_H263){ 1050 if(s->out_format == FMT_H263){
1126 pred_x = P_MEDIAN[0]; 1051 c->pred_x = P_MEDIAN[0];
1127 pred_y = P_MEDIAN[1]; 1052 c->pred_y = P_MEDIAN[1];
1128 }else { /* mpeg1 at least */ 1053 }else { /* mpeg1 at least */
1129 pred_x= P_LEFT[0]; 1054 c->pred_x= P_LEFT[0];
1130 pred_y= P_LEFT[1]; 1055 c->pred_y= P_LEFT[1];
1131 } 1056 }
1132 }else{ 1057 }else{
1133 pred_x= P_LEFT[0]; 1058 c->pred_x= P_LEFT[0];
1134 pred_y= P_LEFT[1]; 1059 c->pred_y= P_LEFT[1];
1135 } 1060 }
1136 1061
1137 } 1062 }
1138 dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y, 1063 dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift);
1139 src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); 1064
1140
1141 break; 1065 break;
1142 } 1066 }
1143 1067
1144 /* intra / predictive decision */ 1068 /* intra / predictive decision */
1145 xx = mb_x * 16; 1069 xx = mb_x * 16;
1146 yy = mb_y * 16; 1070 yy = mb_y * 16;
1147 1071
1148 pix = src_data[0]; 1072 pix = c->src[0][0];
1149 /* At this point (mx,my) are full-pell and the relative displacement */ 1073 /* At this point (mx,my) are full-pell and the relative displacement */
1150 ppix = ref_data[0] + (my * s->linesize) + mx; 1074 ppix = c->ref[0][0] + (my * s->linesize) + mx;
1151 1075
1152 sum = s->dsp.pix_sum(pix, s->linesize); 1076 sum = s->dsp.pix_sum(pix, s->linesize);
1153 1077
1154 varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; 1078 varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
1155 vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8; 1079 vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8;
1175 1099
1176 if (vard*2 + 200 > varc) 1100 if (vard*2 + 200 > varc)
1177 mb_type|= CANDIDATE_MB_TYPE_INTRA; 1101 mb_type|= CANDIDATE_MB_TYPE_INTRA;
1178 if (varc*2 + 200 > vard){ 1102 if (varc*2 + 200 > vard){
1179 mb_type|= CANDIDATE_MB_TYPE_INTER; 1103 mb_type|= CANDIDATE_MB_TYPE_INTER;
1180 s->me.sub_motion_search(s, &mx, &my, dmin, 1104 s->me.sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1181 pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1182 if(s->flags&CODEC_FLAG_MV0) 1105 if(s->flags&CODEC_FLAG_MV0)
1183 if(mx || my) 1106 if(mx || my)
1184 mb_type |= CANDIDATE_MB_TYPE_SKIPED; //FIXME check difference 1107 mb_type |= CANDIDATE_MB_TYPE_SKIPED; //FIXME check difference
1185 }else{ 1108 }else{
1186 mx <<=shift; 1109 mx <<=shift;
1194 set_p_mv_tables(s, mx, my, 0); 1117 set_p_mv_tables(s, mx, my, 0);
1195 }else 1118 }else
1196 set_p_mv_tables(s, mx, my, 1); 1119 set_p_mv_tables(s, mx, my, 1);
1197 if((s->flags&CODEC_FLAG_INTERLACED_ME) 1120 if((s->flags&CODEC_FLAG_INTERLACED_ME)
1198 && !s->me.skip){ //FIXME varc/d checks 1121 && !s->me.skip){ //FIXME varc/d checks
1199 if(interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my) < INT_MAX) 1122 if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my) < INT_MAX)
1200 mb_type |= CANDIDATE_MB_TYPE_INTER_I; 1123 mb_type |= CANDIDATE_MB_TYPE_INTER_I;
1201 } 1124 }
1202 }else{ 1125 }else{
1203 int intra_score, i; 1126 int intra_score, i;
1204 mb_type= CANDIDATE_MB_TYPE_INTER; 1127 mb_type= CANDIDATE_MB_TYPE_INTER;
1205 1128
1206 dmin= s->me.sub_motion_search(s, &mx, &my, dmin, 1129 dmin= s->me.sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1207 pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1208 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) 1130 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1209 dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty); 1131 dmin= get_mb_score(s, mx, my, 0, 0);
1210 1132
1211 if((s->flags&CODEC_FLAG_4MV) 1133 if((s->flags&CODEC_FLAG_4MV)
1212 && !s->me.skip && varc>50 && vard>10){ 1134 && !s->me.skip && varc>50 && vard>10){
1213 int dmin4= h263_mv4_search(s, mx, my, shift); 1135 int dmin4= h263_mv4_search(s, mx, my, shift);
1214 if(dmin4 < dmin){ 1136 if(dmin4 < dmin){
1216 dmin=dmin4; 1138 dmin=dmin4;
1217 } 1139 }
1218 } 1140 }
1219 if((s->flags&CODEC_FLAG_INTERLACED_ME) 1141 if((s->flags&CODEC_FLAG_INTERLACED_ME)
1220 && !s->me.skip){ //FIXME varc/d checks 1142 && !s->me.skip){ //FIXME varc/d checks
1221 int dmin_i= interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my); 1143 int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my);
1222 if(dmin_i < dmin){ 1144 if(dmin_i < dmin){
1223 mb_type = CANDIDATE_MB_TYPE_INTER_I; 1145 mb_type = CANDIDATE_MB_TYPE_INTER_I;
1224 dmin= dmin_i; 1146 dmin= dmin_i;
1225 } 1147 }
1226 } 1148 }
1287 } 1209 }
1288 1210
1289 int ff_pre_estimate_p_frame_motion(MpegEncContext * s, 1211 int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
1290 int mb_x, int mb_y) 1212 int mb_x, int mb_y)
1291 { 1213 {
1214 MotionEstContext * const c= &s->me;
1292 int mx, my, dmin; 1215 int mx, my, dmin;
1293 int pred_x=0, pred_y=0;
1294 int P[10][2]; 1216 int P[10][2];
1295 const int shift= 1+s->quarter_sample; 1217 const int shift= 1+s->quarter_sample;
1296 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1297 const int xy= mb_x + mb_y*s->mb_stride; 1218 const int xy= mb_x + mb_y*s->mb_stride;
1298 const int stride= s->linesize; 1219 init_ref(s, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1299 const int uvstride= s->uvlinesize; 1220 init_mc(s, 0, c->flags);
1300 uint8_t *src_data[3]= {
1301 s->new_picture.data[0] + 16*(mb_x + stride*mb_y),
1302 s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1303 s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1304 };
1305 uint8_t *ref_data[3]= {
1306 s->last_picture.data[0] + 16*(mb_x + stride*mb_y),
1307 s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1308 s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1309 };
1310 1221
1311 assert(s->quarter_sample==0 || s->quarter_sample==1); 1222 assert(s->quarter_sample==0 || s->quarter_sample==1);
1312 1223
1313 s->me.pre_penalty_factor = get_penalty_factor(s, s->avctx->me_pre_cmp); 1224 s->me.pre_penalty_factor = get_penalty_factor(s, s->avctx->me_pre_cmp);
1225 s->me.current_mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1314 1226
1315 get_limits(s, 16*mb_x, 16*mb_y); 1227 get_limits(s, 16*mb_x, 16*mb_y);
1316 s->me.skip=0; 1228 s->me.skip=0;
1317 1229
1318 P_LEFT[0] = s->p_mv_table[xy + 1][0]; 1230 P_LEFT[0] = s->p_mv_table[xy + 1][0];
1320 1232
1321 if(P_LEFT[0] < (s->me.xmin<<shift)) P_LEFT[0] = (s->me.xmin<<shift); 1233 if(P_LEFT[0] < (s->me.xmin<<shift)) P_LEFT[0] = (s->me.xmin<<shift);
1322 1234
1323 /* special case for first line */ 1235 /* special case for first line */
1324 if (s->first_slice_line) { 1236 if (s->first_slice_line) {
1325 pred_x= P_LEFT[0]; 1237 c->pred_x= P_LEFT[0];
1326 pred_y= P_LEFT[1]; 1238 c->pred_y= P_LEFT[1];
1327 P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]= 1239 P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1328 P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME 1240 P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1329 } else { 1241 } else {
1330 P_TOP[0] = s->p_mv_table[xy + s->mb_stride ][0]; 1242 P_TOP[0] = s->p_mv_table[xy + s->mb_stride ][0];
1331 P_TOP[1] = s->p_mv_table[xy + s->mb_stride ][1]; 1243 P_TOP[1] = s->p_mv_table[xy + s->mb_stride ][1];
1336 if(P_TOPRIGHT[1] < (s->me.ymin<<shift)) P_TOPRIGHT[1]= (s->me.ymin<<shift); 1248 if(P_TOPRIGHT[1] < (s->me.ymin<<shift)) P_TOPRIGHT[1]= (s->me.ymin<<shift);
1337 1249
1338 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1250 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1339 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1251 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1340 1252
1341 pred_x = P_MEDIAN[0]; 1253 c->pred_x = P_MEDIAN[0];
1342 pred_y = P_MEDIAN[1]; 1254 c->pred_y = P_MEDIAN[1];
1343 } 1255 }
1344 dmin = s->me.pre_motion_search(s, &mx, &my, P, pred_x, pred_y, 1256
1345 src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); 1257 dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift);
1346 1258
1347 s->p_mv_table[xy][0] = mx<<shift; 1259 s->p_mv_table[xy][0] = mx<<shift;
1348 s->p_mv_table[xy][1] = my<<shift; 1260 s->p_mv_table[xy][1] = my<<shift;
1349 1261
1350 return dmin; 1262 return dmin;
1351 } 1263 }
1352 1264
1353 static int ff_estimate_motion_b(MpegEncContext * s, 1265 static int ff_estimate_motion_b(MpegEncContext * s,
1354 int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *src_data[3], 1266 int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1355 uint8_t *ref_data[3], int stride, int uvstride, int f_code)
1356 { 1267 {
1357 int mx, my, dmin; 1268 int mx, my, dmin;
1358 int pred_x=0, pred_y=0;
1359 int P[10][2]; 1269 int P[10][2];
1360 const int shift= 1+s->quarter_sample; 1270 const int shift= 1+s->quarter_sample;
1361 const int mot_stride = s->mb_stride; 1271 const int mot_stride = s->mb_stride;
1362 const int mot_xy = mb_y*mot_stride + mb_x; 1272 const int mot_xy = mb_y*mot_stride + mb_x;
1363 uint8_t * const ref_picture= ref_data[0] - 16*s->mb_x - 16*s->mb_y*s->linesize; //FIXME ugly
1364 uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV; 1273 uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
1365 int mv_scale; 1274 int mv_scale;
1366 1275
1367 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); 1276 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
1368 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); 1277 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1369 s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); 1278 s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
1279 s->me.current_mv_penalty= mv_penalty;
1370 1280
1371 get_limits(s, 16*mb_x, 16*mb_y); 1281 get_limits(s, 16*mb_x, 16*mb_y);
1372 1282
1373 switch(s->me_method) { 1283 switch(s->me_method) {
1374 case ME_ZERO: 1284 case ME_ZERO:
1414 if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift); 1324 if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
1415 1325
1416 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1326 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1417 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1327 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1418 } 1328 }
1419 pred_x= P_LEFT[0]; 1329 s->me.pred_x= P_LEFT[0];
1420 pred_y= P_LEFT[1]; 1330 s->me.pred_y= P_LEFT[1];
1421 } 1331 }
1422 1332
1423 if(mv_table == s->b_forw_mv_table){ 1333 if(mv_table == s->b_forw_mv_table){
1424 mv_scale= (s->pb_time<<16) / (s->pp_time<<shift); 1334 mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
1425 }else{ 1335 }else{
1426 mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift); 1336 mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
1427 } 1337 }
1428 1338
1429 dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y, 1339 dmin = epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale);
1430 src_data, ref_data, stride, uvstride, s->p_mv_table, mv_scale, mv_penalty);
1431 1340
1432 break; 1341 break;
1433 } 1342 }
1434 1343
1435 dmin= s->me.sub_motion_search(s, &mx, &my, dmin, 1344 dmin= s->me.sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1436 pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1437 1345
1438 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) 1346 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1439 dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty); 1347 dmin= get_mb_score(s, mx, my, 0, ref_index);
1440 1348
1441 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); 1349 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1442 // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; 1350 // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1443 mv_table[mot_xy][0]= mx; 1351 mv_table[mot_xy][0]= mx;
1444 mv_table[mot_xy][1]= my; 1352 mv_table[mot_xy][1]= my;
1445 1353
1446 return dmin; 1354 return dmin;
1447 } 1355 }
1448 1356
1449 static inline int check_bidir_mv(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], 1357 static inline int check_bidir_mv(MpegEncContext * s,
1450 int stride, int uvstride,
1451 int motion_fx, int motion_fy, 1358 int motion_fx, int motion_fy,
1452 int motion_bx, int motion_by, 1359 int motion_bx, int motion_by,
1453 int pred_fx, int pred_fy, 1360 int pred_fx, int pred_fy,
1454 int pred_bx, int pred_by, 1361 int pred_bx, int pred_by,
1455 int size, int h) 1362 int size, int h)
1456 { 1363 {
1457 //FIXME optimize? 1364 //FIXME optimize?
1458 //FIXME move into template?
1459 //FIXME better f_code prediction (max mv & distance) 1365 //FIXME better f_code prediction (max mv & distance)
1460 //FIXME pointers 1366 //FIXME pointers
1367 MotionEstContext * const c= &s->me;
1461 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame 1368 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1369 int stride= s->me.stride;
1370 int uvstride= s->me.uvstride;
1462 uint8_t *dest_y = s->me.scratchpad; 1371 uint8_t *dest_y = s->me.scratchpad;
1463 uint8_t *ptr; 1372 uint8_t *ptr;
1464 int dxy; 1373 int dxy;
1465 int src_x, src_y; 1374 int src_x, src_y;
1466 int fbmin; 1375 int fbmin;
1376 uint8_t **src_data= c->src[0];
1377 uint8_t **ref_data= c->ref[0];
1378 uint8_t **ref2_data= c->ref[2];
1467 1379
1468 if(s->quarter_sample){ 1380 if(s->quarter_sample){
1469 dxy = ((motion_fy & 3) << 2) | (motion_fx & 3); 1381 dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1470 src_x = motion_fx >> 2; 1382 src_x = motion_fx >> 2;
1471 src_y = motion_fy >> 2; 1383 src_y = motion_fy >> 2;
1475 1387
1476 dxy = ((motion_by & 3) << 2) | (motion_bx & 3); 1388 dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1477 src_x = motion_bx >> 2; 1389 src_x = motion_bx >> 2;
1478 src_y = motion_by >> 2; 1390 src_y = motion_by >> 2;
1479 1391
1480 ptr = ref_data[3] + (src_y * stride) + src_x; 1392 ptr = ref2_data[0] + (src_y * stride) + src_x;
1481 s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y , ptr , stride); 1393 s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y , ptr , stride);
1482 }else{ 1394 }else{
1483 dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); 1395 dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1484 src_x = motion_fx >> 1; 1396 src_x = motion_fx >> 1;
1485 src_y = motion_fy >> 1; 1397 src_y = motion_fy >> 1;
1489 1401
1490 dxy = ((motion_by & 1) << 1) | (motion_bx & 1); 1402 dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1491 src_x = motion_bx >> 1; 1403 src_x = motion_bx >> 1;
1492 src_y = motion_by >> 1; 1404 src_y = motion_by >> 1;
1493 1405
1494 ptr = ref_data[3] + (src_y * stride) + src_x; 1406 ptr = ref2_data[0] + (src_y * stride) + src_x;
1495 s->dsp.avg_pixels_tab[size][dxy](dest_y , ptr , stride, h); 1407 s->dsp.avg_pixels_tab[size][dxy](dest_y , ptr , stride, h);
1496 } 1408 }
1497 1409
1498 fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor 1410 fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor
1499 +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor 1411 +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor
1505 1417
1506 return fbmin; 1418 return fbmin;
1507 } 1419 }
1508 1420
1509 /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/ 1421 /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1510 static inline int bidir_refine(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], 1422 static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1511 int stride, int uvstride,
1512 int mb_x, int mb_y)
1513 { 1423 {
1514 const int mot_stride = s->mb_stride; 1424 const int mot_stride = s->mb_stride;
1515 const int xy = mb_y *mot_stride + mb_x; 1425 const int xy = mb_y *mot_stride + mb_x;
1516 int fbmin; 1426 int fbmin;
1517 int pred_fx= s->b_bidir_forw_mv_table[xy-1][0]; 1427 int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
1523 int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0]; 1433 int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
1524 int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1]; 1434 int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1525 1435
1526 //FIXME do refinement and add flag 1436 //FIXME do refinement and add flag
1527 1437
1528 fbmin= check_bidir_mv(s, src_data, ref_data, stride, uvstride, 1438 fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1529 motion_fx, motion_fy,
1530 motion_bx, motion_by, 1439 motion_bx, motion_by,
1531 pred_fx, pred_fy, 1440 pred_fx, pred_fy,
1532 pred_bx, pred_by, 1441 pred_bx, pred_by,
1533 0, 16); 1442 0, 16);
1534 1443
1535 return fbmin; 1444 return fbmin;
1536 } 1445 }
1537 1446
1538 static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], 1447 static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1539 int stride, int uvstride,
1540 int mb_x, int mb_y)
1541 { 1448 {
1542 int P[10][2]; 1449 int P[10][2];
1543 const int mot_stride = s->mb_stride; 1450 const int mot_stride = s->mb_stride;
1544 const int mot_xy = mb_y*mot_stride + mb_x; 1451 const int mot_xy = mb_y*mot_stride + mb_x;
1545 const int shift= 1+s->quarter_sample; 1452 const int shift= 1+s->quarter_sample;
1546 int dmin, i; 1453 int dmin, i;
1547 const int time_pp= s->pp_time; 1454 const int time_pp= s->pp_time;
1548 const int time_pb= s->pb_time; 1455 const int time_pb= s->pb_time;
1549 int mx, my, xmin, xmax, ymin, ymax; 1456 int mx, my, xmin, xmax, ymin, ymax;
1550 int16_t (*mv_table)[2]= s->b_direct_mv_table; 1457 int16_t (*mv_table)[2]= s->b_direct_mv_table;
1551 uint8_t * const mv_penalty= s->me.mv_penalty[1] + MAX_MV; 1458
1552 1459 s->me.current_mv_penalty= s->me.mv_penalty[1] + MAX_MV;
1553 ymin= xmin=(-32)>>shift; 1460 ymin= xmin=(-32)>>shift;
1554 ymax= xmax= 31>>shift; 1461 ymax= xmax= 31>>shift;
1555 1462
1556 if(IS_8X8(s->next_picture.mb_type[mot_xy])){ 1463 if(IS_8X8(s->next_picture.mb_type[mot_xy])){
1557 s->mv_type= MV_TYPE_8X8; 1464 s->mv_type= MV_TYPE_8X8;
1598 1505
1599 s->me.xmin= xmin; 1506 s->me.xmin= xmin;
1600 s->me.ymin= ymin; 1507 s->me.ymin= ymin;
1601 s->me.xmax= xmax; 1508 s->me.xmax= xmax;
1602 s->me.ymax= ymax; 1509 s->me.ymax= ymax;
1510 s->me.flags |= FLAG_DIRECT;
1511 s->me.sub_flags |= FLAG_DIRECT;
1512 s->me.pred_x=0;
1513 s->me.pred_y=0;
1603 1514
1604 P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift); 1515 P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
1605 P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift); 1516 P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
1606 1517
1607 /* special case for first line */ 1518 /* special case for first line */
1613 1524
1614 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1525 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1615 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1526 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1616 } 1527 }
1617 1528
1618 //FIXME direct_search ptr in context!!! (needed for chroma anyway or this will get messy) 1529 dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift));
1619 if(s->flags&CODEC_FLAG_QPEL){ 1530 if(s->me.sub_flags&FLAG_QPEL)
1620 dmin = simple_direct_qpel_epzs_motion_search(s, &mx, &my, P, 0, 0, 1531 dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1621 src_data, ref_data, stride, uvstride, mv_table, 1<<14, mv_penalty); 1532 else
1622 dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, 1533 dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1623 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); 1534
1624 1535 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1625 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) 1536 dmin= get_mb_score(s, mx, my, 0, 0);
1626 dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty);
1627 }else{
1628 dmin = simple_direct_hpel_epzs_motion_search(s, &mx, &my, P, 0, 0,
1629 src_data, ref_data, stride, uvstride, mv_table, 1<<15, mv_penalty);
1630 dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin,
1631 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1632
1633 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1634 dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty);
1635 }
1636 1537
1637 get_limits(s, 16*mb_x, 16*mb_y); //restore s->me.?min/max, maybe not needed 1538 get_limits(s, 16*mb_x, 16*mb_y); //restore s->me.?min/max, maybe not needed
1638 1539
1639 s->b_direct_mv_table[mot_xy][0]= mx; 1540 s->b_direct_mv_table[mot_xy][0]= mx;
1640 s->b_direct_mv_table[mot_xy][1]= my; 1541 s->b_direct_mv_table[mot_xy][1]= my;
1542 s->me.flags &= ~FLAG_DIRECT;
1543 s->me.sub_flags &= ~FLAG_DIRECT;
1544
1641 return dmin; 1545 return dmin;
1642 } 1546 }
1643 1547
1644 void ff_estimate_b_frame_motion(MpegEncContext * s, 1548 void ff_estimate_b_frame_motion(MpegEncContext * s,
1645 int mb_x, int mb_y) 1549 int mb_x, int mb_y)
1646 { 1550 {
1647 const int penalty_factor= s->me.mb_penalty_factor; 1551 const int penalty_factor= s->me.mb_penalty_factor;
1648 int fmin, bmin, dmin, fbmin, bimin, fimin; 1552 int fmin, bmin, dmin, fbmin, bimin, fimin;
1649 int type=0; 1553 int type=0;
1650 const int stride= s->linesize; 1554 init_ref(s, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
1651 const int uvstride= s->uvlinesize; 1555 init_mc(s, 0, s->me.flags);
1652 uint8_t *src_data[3]= {
1653 s->new_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1654 s->new_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1655 s->new_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y)
1656 };
1657 uint8_t *ref_data[6]= {
1658 s->last_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1659 s->last_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1660 s->last_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y),
1661 s->next_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1662 s->next_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1663 s->next_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y)
1664 };
1665 1556
1666 s->me.skip=0; 1557 s->me.skip=0;
1667 if (s->codec_id == CODEC_ID_MPEG4) 1558 if (s->codec_id == CODEC_ID_MPEG4)
1668 dmin= direct_search(s, src_data, ref_data, stride, uvstride, mb_x, mb_y); 1559 dmin= direct_search(s, mb_x, mb_y);
1669 else 1560 else
1670 dmin= INT_MAX; 1561 dmin= INT_MAX;
1671 //FIXME penalty stuff for non mpeg4 1562 //FIXME penalty stuff for non mpeg4
1672 s->me.skip=0; 1563 s->me.skip=0;
1673 fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, src_data, 1564 fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1674 ref_data, stride, uvstride, s->f_code) + 3*penalty_factor;
1675 1565
1676 s->me.skip=0; 1566 s->me.skip=0;
1677 bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, src_data, 1567 bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1678 ref_data+3, stride, uvstride, s->b_code) + 2*penalty_factor;
1679 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); 1568 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1680 1569
1681 s->me.skip=0; 1570 s->me.skip=0;
1682 fbmin= bidir_refine(s, src_data, ref_data, stride, uvstride, mb_x, mb_y) + penalty_factor; 1571 fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1683 //printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin); 1572 //printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1684 1573
1685 if(s->flags & CODEC_FLAG_INTERLACED_ME){ 1574 if(s->flags & CODEC_FLAG_INTERLACED_ME){
1686 const int xy = mb_y*s->mb_stride + mb_x; 1575 const int xy = mb_y*s->mb_stride + mb_x;
1687 1576
1688 //FIXME mb type penalty 1577 //FIXME mb type penalty
1689 s->me.skip=0; 1578 s->me.skip=0;
1690 fimin= interlaced_search(s, src_data, ref_data , 1579 s->me.current_mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1691 s->b_field_mv_table[0], s->b_field_select_table[0], s->f_code, 1580 fimin= interlaced_search(s, 0,
1581 s->b_field_mv_table[0], s->b_field_select_table[0],
1692 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); 1582 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1693 bimin= interlaced_search(s, src_data, ref_data+3, 1583 s->me.current_mv_penalty= s->me.mv_penalty[s->b_code] + MAX_MV;
1694 s->b_field_mv_table[1], s->b_field_select_table[1], s->b_code, 1584 bimin= interlaced_search(s, 2,
1585 s->b_field_mv_table[1], s->b_field_select_table[1],
1695 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1]); 1586 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1]);
1696 }else 1587 }else
1697 fimin= bimin= INT_MAX; 1588 fimin= bimin= INT_MAX;
1698 1589
1699 { 1590 {