Mercurial > libavcodec.hg
comparison vp8dsp.c @ 12238:1a7903913e9b libavcodec
VP8: 30% faster idct_mb
Take shortcuts based on statically common situations.
Add 4-at-a-time idct_dc function (mmx and sse2) since rows of 4 DC-only DCT
blocks are common.
TODO: tie this more directly into the MB mode, since the DC-level transform is
only used for non-splitmv blocks?
author | darkshikari |
---|---|
date | Fri, 23 Jul 2010 02:58:27 +0000 |
parents | e08d65897115 |
children | c7f6ddcc5c01 |
comparison
equal
deleted
inserted
replaced
12237:f0c4dc49c8f1 | 12238:1a7903913e9b |
---|---|
107 dst[3] = cm[dst[3]]; | 107 dst[3] = cm[dst[3]]; |
108 dst += stride; | 108 dst += stride; |
109 } | 109 } |
110 } | 110 } |
111 | 111 |
112 static void vp8_idct_dc_add4_c(uint8_t *dst, DCTELEM block[4][16], int stride) | |
113 { | |
114 int i, j; | |
115 for (j = 0; j < 4; j++) { | |
116 uint8_t *pix = dst+j*4; | |
117 int dc = (block[j][0] + 4) >> 3; | |
118 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; | |
119 block[j][0] = 0; | |
120 if (!dc) | |
121 continue; | |
122 for (i = 0; i < 4; i++) { | |
123 pix[0] = cm[pix[0]]; | |
124 pix[1] = cm[pix[1]]; | |
125 pix[2] = cm[pix[2]]; | |
126 pix[3] = cm[pix[3]]; | |
127 pix += stride; | |
128 } | |
129 } | |
130 } | |
112 | 131 |
113 // because I like only having two parameters to pass functions... | 132 // because I like only having two parameters to pass functions... |
114 #define LOAD_PIXELS\ | 133 #define LOAD_PIXELS\ |
115 int av_unused p3 = p[-4*stride];\ | 134 int av_unused p3 = p[-4*stride];\ |
116 int av_unused p2 = p[-3*stride];\ | 135 int av_unused p2 = p[-3*stride];\ |
458 dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | 477 dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
459 dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c | 478 dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c |
460 | 479 |
461 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) | 480 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) |
462 { | 481 { |
463 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; | 482 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; |
464 dsp->vp8_idct_add = vp8_idct_add_c; | 483 dsp->vp8_idct_add = vp8_idct_add_c; |
465 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; | 484 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; |
485 dsp->vp8_idct_dc_add4 = vp8_idct_dc_add4_c; | |
466 | 486 |
467 dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; | 487 dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; |
468 dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; | 488 dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |
469 dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; | 489 dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; |
470 dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; | 490 dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; |