comparison dsputil.c @ 2184:3378d0677903 libavcodec

4x4 SSE compare function wavelet based compare functions make epzs_motion_search() more flexible so it can be used for a wider range of block sizes make get_penalty_factor() independant of MpegEncContext
author michael
date Sun, 22 Aug 2004 17:16:03 +0000
parents db8baace74d8
children 9ca8a88a8a70
comparison
equal deleted inserted replaced
2183:6d40885b03ad 2184:3378d0677903
217 for(;i<w; i++){ 217 for(;i<w; i++){
218 dst[i+0]= bswap_32(src[i+0]); 218 dst[i+0]= bswap_32(src[i+0]);
219 } 219 }
220 } 220 }
221 221
222 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
223 {
224 int s, i;
225 uint32_t *sq = squareTbl + 256;
226
227 s = 0;
228 for (i = 0; i < h; i++) {
229 s += sq[pix1[0] - pix2[0]];
230 s += sq[pix1[1] - pix2[1]];
231 s += sq[pix1[2] - pix2[2]];
232 s += sq[pix1[3] - pix2[3]];
233 pix1 += line_size;
234 pix2 += line_size;
235 }
236 return s;
237 }
238
222 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) 239 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
223 { 240 {
224 int s, i; 241 int s, i;
225 uint32_t *sq = squareTbl + 256; 242 uint32_t *sq = squareTbl + 256;
226 243
266 283
267 pix1 += line_size; 284 pix1 += line_size;
268 pix2 += line_size; 285 pix2 += line_size;
269 } 286 }
270 return s; 287 return s;
288 }
289
290
291 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
292 int s, i, j;
293 const int dec_count= w==8 ? 3 : 4;
294 int tmp[16*16];
295 #if 0
296 int level, ori;
297 static const int scale[2][2][4][4]={
298 {
299 {
300 //8x8 dec=3
301 {268, 239, 239, 213},
302 { 0, 224, 224, 152},
303 { 0, 135, 135, 110},
304 },{
305 //16x16 dec=4
306 {344, 310, 310, 280},
307 { 0, 320, 320, 228},
308 { 0, 175, 175, 136},
309 { 0, 129, 129, 102},
310 }
311 },{
312 {//FIXME 5/3
313 //8x8 dec=3
314 {275, 245, 245, 218},
315 { 0, 230, 230, 156},
316 { 0, 138, 138, 113},
317 },{
318 //16x16 dec=4
319 {352, 317, 317, 286},
320 { 0, 328, 328, 233},
321 { 0, 180, 180, 140},
322 { 0, 132, 132, 105},
323 }
324 }
325 };
326 #endif
327
328 for (i = 0; i < h; i++) {
329 for (j = 0; j < w; j+=4) {
330 tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
331 tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
332 tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
333 tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
334 }
335 pix1 += line_size;
336 pix2 += line_size;
337 }
338 ff_spatial_dwt(tmp, w, h, 16, type, dec_count);
339
340 s=0;
341 #if 0
342 for(level=0; level<dec_count; level++){
343 for(ori= level ? 1 : 0; ori<4; ori++){
344 int sx= (ori&1) ? 1<<level: 0;
345 int stride= 16<<(dec_count-level);
346 int sy= (ori&2) ? stride>>1 : 0;
347 int size= 1<<level;
348
349 for(i=0; i<size; i++){
350 for(j=0; j<size; j++){
351 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
352 s += ABS(v);
353 }
354 }
355 }
356 }
357 #endif
358 for (i = 0; i < h; i++) {
359 for (j = 0; j < w; j+=4) {
360 s+= ABS(tmp[16*i+j+0]);
361 s+= ABS(tmp[16*i+j+1]);
362 s+= ABS(tmp[16*i+j+2]);
363 s+= ABS(tmp[16*i+j+3]);
364 }
365 }
366 assert(s>=0);
367
368 return s>>2;
369 }
370
371 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
372 return w_c(v, pix1, pix2, line_size, 8, h, 1);
373 }
374
375 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
376 return w_c(v, pix1, pix2, line_size, 8, h, 0);
377 }
378
379 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
380 return w_c(v, pix1, pix2, line_size, 16, h, 1);
381 }
382
383 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
384 return w_c(v, pix1, pix2, line_size, 16, h, 0);
271 } 385 }
272 386
273 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) 387 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
274 { 388 {
275 int i; 389 int i;
2731 cmp[i]= zero_cmp; 2845 cmp[i]= zero_cmp;
2732 break; 2846 break;
2733 case FF_CMP_NSSE: 2847 case FF_CMP_NSSE:
2734 cmp[i]= c->nsse[i]; 2848 cmp[i]= c->nsse[i];
2735 break; 2849 break;
2850 case FF_CMP_W53:
2851 cmp[i]= c->w53[i];
2852 break;
2853 case FF_CMP_W97:
2854 cmp[i]= c->w97[i];
2855 break;
2736 default: 2856 default:
2737 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); 2857 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
2738 } 2858 }
2739 } 2859 }
2740 } 2860 }
3357 SET_CMP_FUNC(dct_sad) 3477 SET_CMP_FUNC(dct_sad)
3358 c->sad[0]= pix_abs16_c; 3478 c->sad[0]= pix_abs16_c;
3359 c->sad[1]= pix_abs8_c; 3479 c->sad[1]= pix_abs8_c;
3360 c->sse[0]= sse16_c; 3480 c->sse[0]= sse16_c;
3361 c->sse[1]= sse8_c; 3481 c->sse[1]= sse8_c;
3482 c->sse[2]= sse4_c;
3362 SET_CMP_FUNC(quant_psnr) 3483 SET_CMP_FUNC(quant_psnr)
3363 SET_CMP_FUNC(rd) 3484 SET_CMP_FUNC(rd)
3364 SET_CMP_FUNC(bit) 3485 SET_CMP_FUNC(bit)
3365 c->vsad[0]= vsad16_c; 3486 c->vsad[0]= vsad16_c;
3366 c->vsad[4]= vsad_intra16_c; 3487 c->vsad[4]= vsad_intra16_c;
3367 c->vsse[0]= vsse16_c; 3488 c->vsse[0]= vsse16_c;
3368 c->vsse[4]= vsse_intra16_c; 3489 c->vsse[4]= vsse_intra16_c;
3369 c->nsse[0]= nsse16_c; 3490 c->nsse[0]= nsse16_c;
3370 c->nsse[1]= nsse8_c; 3491 c->nsse[1]= nsse8_c;
3371 3492 c->w53[0]= w53_16_c;
3493 c->w53[1]= w53_8_c;
3494 c->w97[0]= w97_16_c;
3495 c->w97[1]= w97_8_c;
3496
3372 c->add_bytes= add_bytes_c; 3497 c->add_bytes= add_bytes_c;
3373 c->diff_bytes= diff_bytes_c; 3498 c->diff_bytes= diff_bytes_c;
3374 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; 3499 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
3375 c->bswap_buf= bswap_buf; 3500 c->bswap_buf= bswap_buf;
3376 3501