comparison snow.c @ 2223:b26474e72d6d libavcodec

use h264 MC code if possible 50% faster predict_plane() if mmx2/3dnow is available 0.1% bitrate increase
author michael
date Sun, 12 Sep 2004 21:32:36 +0000
parents 3543987dccad
children 11d54cb7ac4e
comparison
equal deleted inserted replaced
2222:ef568cc0972c 2223:b26474e72d6d
1986 tmp += stride; 1986 tmp += stride;
1987 } 1987 }
1988 STOP_TIMER("mc_block") 1988 STOP_TIMER("mc_block")
1989 } 1989 }
1990 1990
1991 #define mcb(dx,dy,b_w)\
1992 static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
1993 uint8_t tmp[stride*(b_w+5)];\
1994 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1995 }
1996
1997 mcb( 0, 0,16)
1998 mcb( 4, 0,16)
1999 mcb( 8, 0,16)
2000 mcb(12, 0,16)
2001 mcb( 0, 4,16)
2002 mcb( 4, 4,16)
2003 mcb( 8, 4,16)
2004 mcb(12, 4,16)
2005 mcb( 0, 8,16)
2006 mcb( 4, 8,16)
2007 mcb( 8, 8,16)
2008 mcb(12, 8,16)
2009 mcb( 0,12,16)
2010 mcb( 4,12,16)
2011 mcb( 8,12,16)
2012 mcb(12,12,16)
2013
2014 #define mca(dx,dy,b_w)\ 1991 #define mca(dx,dy,b_w)\
2015 static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\ 1992 static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2016 uint8_t tmp[stride*(b_w+5)];\ 1993 uint8_t tmp[stride*(b_w+5)];\
2017 assert(h==b_w);\ 1994 assert(h==b_w);\
2018 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\ 1995 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2034 } 2011 }
2035 }else{ 2012 }else{
2036 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; 2013 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2037 int mx= block->mx*scale; 2014 int mx= block->mx*scale;
2038 int my= block->my*scale; 2015 int my= block->my*scale;
2016 const int dx= mx&15;
2017 const int dy= my&15;
2039 sx += (mx>>4) - 2; 2018 sx += (mx>>4) - 2;
2040 sy += (my>>4) - 2; 2019 sy += (my>>4) - 2;
2041 src += sx + sy*stride; 2020 src += sx + sy*stride;
2042 if( (unsigned)sx >= w - b_w - 4 2021 if( (unsigned)sx >= w - b_w - 4
2043 || (unsigned)sy >= h - b_h - 4){ 2022 || (unsigned)sy >= h - b_h - 4){
2044 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h); 2023 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2045 src= tmp + MB_SIZE; 2024 src= tmp + MB_SIZE;
2046 } 2025 }
2047 mc_block(dst, src, tmp, stride, b_w, b_h, mx&15, my&15); 2026 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2027 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2028 else
2029 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2048 } 2030 }
2049 } 2031 }
2050 2032
2051 static always_inline int same_block(BlockNode *a, BlockNode *b){ 2033 static always_inline int same_block(BlockNode *a, BlockNode *b){
2052 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type); 2034 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2462 dsputil_init(&s->dsp, avctx); 2444 dsputil_init(&s->dsp, avctx);
2463 2445
2464 #define mcf(dx,dy)\ 2446 #define mcf(dx,dy)\
2465 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ 2447 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2466 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ 2448 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2467 mc_block ## dx ## dy; 2449 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];
2468 2450
2469 mcf( 0, 0) 2451 mcf( 0, 0)
2470 mcf( 4, 0) 2452 mcf( 4, 0)
2471 mcf( 8, 0) 2453 mcf( 8, 0)
2472 mcf(12, 0) 2454 mcf(12, 0)