comparison vp6.c @ 4921:9a2c08b939ee libavcodec

faster and simpler vp6 bilinear mc
author lorenm
date Sun, 06 May 2007 06:43:33 +0000
parents 143b89ab8187
children 58f751086434
comparison
equal deleted inserted replaced
4920:6258f497d42e 4921:9a2c08b939ee
378 src += 2*stride; 378 src += 2*stride;
379 } 379 }
380 return (16*square_sum - sum*sum) >> 8; 380 return (16*square_sum - sum*sum) >> 8;
381 } 381 }
382 382
383 static void vp6_filter_hv2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
384 int stride, int delta, int16_t weight)
385 {
386 s->dsp.put_pixels_tab[1][0](dst, src, stride, 8);
387 s->dsp.biweight_h264_pixels_tab[3](dst, src+delta, stride, 2,
388 8-weight, weight, 0);
389 }
390
391 static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride, 383 static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride,
392 int delta, const int16_t *weights) 384 int delta, const int16_t *weights)
393 { 385 {
394 int x, y; 386 int x, y;
395 387
407 399
408 static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src, 400 static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
409 int stride, int h_weight, int v_weight) 401 int stride, int h_weight, int v_weight)
410 { 402 {
411 uint8_t *tmp = s->edge_emu_buffer+16; 403 uint8_t *tmp = s->edge_emu_buffer+16;
412 int x, xmax; 404 s->dsp.put_h264_chroma_pixels_tab[0](tmp, src, stride, 9, h_weight, 0);
413 405 s->dsp.put_h264_chroma_pixels_tab[0](dst, tmp, stride, 8, 0, v_weight);
414 s->dsp.put_pixels_tab[1][0](tmp, src, stride, 8);
415 s->dsp.biweight_h264_pixels_tab[3](tmp, src+1, stride, 2,
416 8-h_weight, h_weight, 0);
417 /* we need a 8x9 block to do vertical filter, so compute one more line */
418 for (x=8*stride, xmax=x+8; x<xmax; x++)
419 tmp[x] = (src[x]*(8-h_weight) + src[x+1]*h_weight + 4) >> 3;
420
421 s->dsp.put_pixels_tab[1][0](dst, tmp, stride, 8);
422 s->dsp.biweight_h264_pixels_tab[3](dst, tmp+stride, stride, 2,
423 8-v_weight, v_weight, 0);
424 } 406 }
425 407
426 static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride, 408 static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride,
427 const int16_t *h_weights,const int16_t *v_weights) 409 const int16_t *h_weights,const int16_t *v_weights)
428 { 410 {
500 vp6_filter_diag4(dst, src+offset1, stride, 482 vp6_filter_diag4(dst, src+offset1, stride,
501 vp6_block_copy_filter[select][x8], 483 vp6_block_copy_filter[select][x8],
502 vp6_block_copy_filter[select][y8]); 484 vp6_block_copy_filter[select][y8]);
503 } 485 }
504 } else { 486 } else {
505 if (!y8) { /* left or right combine */ 487 if (!x8 || !y8) {
506 vp6_filter_hv2(s, dst, src+offset1, stride, 1, x8); 488 s->dsp.put_h264_chroma_pixels_tab[0](dst, src+offset1, stride, 8, x8, y8);
507 } else if (!x8) { /* above or below combine */
508 vp6_filter_hv2(s, dst, src+offset1, stride, stride, y8);
509 } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */ 489 } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */
510 vp6_filter_diag2(s, dst, src+offset1-1, stride, x8, y8); 490 vp6_filter_diag2(s, dst, src+offset1-1, stride, x8, y8);
511 } else { /* lower-right or upper-left combine */ 491 } else { /* lower-right or upper-left combine */
512 vp6_filter_diag2(s, dst, src+offset1, stride, x8, y8); 492 vp6_filter_diag2(s, dst, src+offset1, stride, x8, y8);
513 } 493 }