Mercurial > libavcodec.hg
comparison vp8.c @ 12241:c7f6ddcc5c01 libavcodec
VP8: optimize DC-only chroma case in the same way as luma.
Add MMX idct_dc_add4uv function for this case.
~40% faster chroma idct.
author | darkshikari |
---|---|
date | Fri, 23 Jul 2010 06:02:52 +0000 |
parents | e6ade5e849c9 |
children | a2f6d8c61b9c |
comparison
equal
deleted
inserted
replaced
12240:e6ade5e849c9 | 12241:c7f6ddcc5c01 |
---|---|
1204 else | 1204 else |
1205 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); | 1205 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); |
1206 } | 1206 } |
1207 } | 1207 } |
1208 } else { | 1208 } else { |
1209 s->vp8dsp.vp8_idct_dc_add4(y_dst, s->block[y], s->linesize); | 1209 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); |
1210 } | 1210 } |
1211 } | 1211 } |
1212 y_dst += 4*s->linesize; | 1212 y_dst += 4*s->linesize; |
1213 } | 1213 } |
1214 } | 1214 } |
1215 | 1215 |
1216 for (ch = 0; ch < 2; ch++) { | 1216 for (ch = 0; ch < 2; ch++) { |
1217 if (AV_RN32A(s->non_zero_count_cache[4+ch])) { | 1217 uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]); |
1218 if (nnz4) { | |
1218 uint8_t *ch_dst = dst[1+ch]; | 1219 uint8_t *ch_dst = dst[1+ch]; |
1219 for (y = 0; y < 2; y++) { | 1220 if (nnz4&~0x01010101) { |
1220 for (x = 0; x < 2; x++) { | 1221 for (y = 0; y < 2; y++) { |
1221 int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; | 1222 for (x = 0; x < 2; x++) { |
1222 if (nnz) { | 1223 int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; |
1223 if (nnz == 1) | 1224 if (nnz) { |
1224 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); | 1225 if (nnz == 1) |
1225 else | 1226 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); |
1226 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); | 1227 else |
1228 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); | |
1229 } | |
1227 } | 1230 } |
1231 ch_dst += 4*s->uvlinesize; | |
1228 } | 1232 } |
1229 ch_dst += 4*s->uvlinesize; | 1233 } else { |
1234 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize); | |
1230 } | 1235 } |
1231 } | 1236 } |
1232 } | 1237 } |
1233 } | 1238 } |
1234 | 1239 |