Mercurial > libavcodec.hg
diff vp8.c @ 12241:c7f6ddcc5c01 libavcodec
VP8: optimize DC-only chroma case in the same way as luma.
Add MMX idct_dc_add4uv function for this case.
~40% faster chroma idct.
author | darkshikari |
---|---|
date | Fri, 23 Jul 2010 06:02:52 +0000 |
parents | e6ade5e849c9 |
children | a2f6d8c61b9c |
line wrap: on
line diff
--- a/vp8.c Fri Jul 23 03:44:37 2010 +0000 +++ b/vp8.c Fri Jul 23 06:02:52 2010 +0000 @@ -1206,7 +1206,7 @@ } } } else { - s->vp8dsp.vp8_idct_dc_add4(y_dst, s->block[y], s->linesize); + s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); } } y_dst += 4*s->linesize; @@ -1214,19 +1214,24 @@ } for (ch = 0; ch < 2; ch++) { - if (AV_RN32A(s->non_zero_count_cache[4+ch])) { + uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]); + if (nnz4) { uint8_t *ch_dst = dst[1+ch]; - for (y = 0; y < 2; y++) { - for (x = 0; x < 2; x++) { - int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; - if (nnz) { - if (nnz == 1) - s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); - else - s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); + if (nnz4&~0x01010101) { + for (y = 0; y < 2; y++) { + for (x = 0; x < 2; x++) { + int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; + if (nnz) { + if (nnz == 1) + s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); + else + s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); + } } + ch_dst += 4*s->uvlinesize; } - ch_dst += 4*s->uvlinesize; + } else { + s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize); } } }