comparison vp8.c @ 12241:c7f6ddcc5c01 libavcodec

VP8: optimize DC-only chroma case in the same way as luma. Add MMX idct_dc_add4uv function for this case. ~40% faster chroma idct.
author darkshikari
date Fri, 23 Jul 2010 06:02:52 +0000
parents e6ade5e849c9
children a2f6d8c61b9c
comparison
equal deleted inserted replaced
12240:e6ade5e849c9 12241:c7f6ddcc5c01
1204 else 1204 else
1205 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); 1205 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1206 } 1206 }
1207 } 1207 }
1208 } else { 1208 } else {
1209 s->vp8dsp.vp8_idct_dc_add4(y_dst, s->block[y], s->linesize); 1209 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1210 } 1210 }
1211 } 1211 }
1212 y_dst += 4*s->linesize; 1212 y_dst += 4*s->linesize;
1213 } 1213 }
1214 } 1214 }
1215 1215
1216 for (ch = 0; ch < 2; ch++) { 1216 for (ch = 0; ch < 2; ch++) {
1217 if (AV_RN32A(s->non_zero_count_cache[4+ch])) { 1217 uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]);
1218 if (nnz4) {
1218 uint8_t *ch_dst = dst[1+ch]; 1219 uint8_t *ch_dst = dst[1+ch];
1219 for (y = 0; y < 2; y++) { 1220 if (nnz4&~0x01010101) {
1220 for (x = 0; x < 2; x++) { 1221 for (y = 0; y < 2; y++) {
1221 int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; 1222 for (x = 0; x < 2; x++) {
1222 if (nnz) { 1223 int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x];
1223 if (nnz == 1) 1224 if (nnz) {
1224 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); 1225 if (nnz == 1)
1225 else 1226 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1226 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); 1227 else
1228 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1229 }
1227 } 1230 }
1231 ch_dst += 4*s->uvlinesize;
1228 } 1232 }
1229 ch_dst += 4*s->uvlinesize; 1233 } else {
1234 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1230 } 1235 }
1231 } 1236 }
1232 } 1237 }
1233 } 1238 }
1234 1239