comparison vc1dsp.c @ 9859:7a116de63777 libavcodec

idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall. Includes mmx2 asm for the various functions. Note that the actual idct still does not have an x86 SIMD implemtation. For wmv3 files using regular idct, the decoder just falls back to simple_idct, since simple_idct_dc doesn't exist (yet).
author darkshikari
date Tue, 16 Jun 2009 09:00:55 +0000
parents 3970fe47fea3
children bf309c7ce615
comparison
equal deleted inserted replaced
9858:53d5914a30ef 9859:7a116de63777
176 vc1_loop_filter(src, stride, 1, 16, pq); 176 vc1_loop_filter(src, stride, 1, 16, pq);
177 } 177 }
178 178
179 /** Do inverse transform on 8x8 block 179 /** Do inverse transform on 8x8 block
180 */ 180 */
181 static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
182 {
183 int i;
184 int dc = block[0];
185 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
186 dc = (3 * dc + 1) >> 1;
187 dc = (3 * dc + 16) >> 5;
188 for(i = 0; i < 8; i++){
189 dest[0] = cm[dest[0]+dc];
190 dest[1] = cm[dest[1]+dc];
191 dest[2] = cm[dest[2]+dc];
192 dest[3] = cm[dest[3]+dc];
193 dest[4] = cm[dest[4]+dc];
194 dest[5] = cm[dest[5]+dc];
195 dest[6] = cm[dest[6]+dc];
196 dest[7] = cm[dest[7]+dc];
197 dest += linesize;
198 }
199 }
200
181 static void vc1_inv_trans_8x8_c(DCTELEM block[64]) 201 static void vc1_inv_trans_8x8_c(DCTELEM block[64])
182 { 202 {
183 int i; 203 int i;
184 register int t1,t2,t3,t4,t5,t6,t7,t8; 204 register int t1,t2,t3,t4,t5,t6,t7,t8;
185 DCTELEM *src, *dst; 205 DCTELEM *src, *dst;
247 } 267 }
248 } 268 }
249 269
250 /** Do inverse transform on 8x4 part of block 270 /** Do inverse transform on 8x4 part of block
251 */ 271 */
272 static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
273 {
274 int i;
275 int dc = block[0];
276 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
277 dc = ( 3 * dc + 1) >> 1;
278 dc = (17 * dc + 64) >> 7;
279 for(i = 0; i < 4; i++){
280 dest[0] = cm[dest[0]+dc];
281 dest[1] = cm[dest[1]+dc];
282 dest[2] = cm[dest[2]+dc];
283 dest[3] = cm[dest[3]+dc];
284 dest[4] = cm[dest[4]+dc];
285 dest[5] = cm[dest[5]+dc];
286 dest[6] = cm[dest[6]+dc];
287 dest[7] = cm[dest[7]+dc];
288 dest += linesize;
289 }
290 }
291
252 static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) 292 static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block)
253 { 293 {
254 int i; 294 int i;
255 register int t1,t2,t3,t4,t5,t6,t7,t8; 295 register int t1,t2,t3,t4,t5,t6,t7,t8;
256 DCTELEM *src, *dst; 296 DCTELEM *src, *dst;
304 } 344 }
305 } 345 }
306 346
307 /** Do inverse transform on 4x8 parts of block 347 /** Do inverse transform on 4x8 parts of block
308 */ 348 */
349 static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
350 {
351 int i;
352 int dc = block[0];
353 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
354 dc = (17 * dc + 4) >> 3;
355 dc = (12 * dc + 64) >> 7;
356 for(i = 0; i < 8; i++){
357 dest[0] = cm[dest[0]+dc];
358 dest[1] = cm[dest[1]+dc];
359 dest[2] = cm[dest[2]+dc];
360 dest[3] = cm[dest[3]+dc];
361 dest += linesize;
362 }
363 }
364
309 static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) 365 static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block)
310 { 366 {
311 int i; 367 int i;
312 register int t1,t2,t3,t4,t5,t6,t7,t8; 368 register int t1,t2,t3,t4,t5,t6,t7,t8;
313 DCTELEM *src, *dst; 369 DCTELEM *src, *dst;
361 } 417 }
362 } 418 }
363 419
364 /** Do inverse transform on 4x4 part of block 420 /** Do inverse transform on 4x4 part of block
365 */ 421 */
422 static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
423 {
424 int i;
425 int dc = block[0];
426 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
427 dc = (17 * dc + 4) >> 3;
428 dc = (17 * dc + 64) >> 7;
429 for(i = 0; i < 4; i++){
430 dest[0] = cm[dest[0]+dc];
431 dest[1] = cm[dest[1]+dc];
432 dest[2] = cm[dest[2]+dc];
433 dest[3] = cm[dest[3]+dc];
434 dest += linesize;
435 }
436 }
437
366 static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) 438 static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block)
367 { 439 {
368 int i; 440 int i;
369 register int t1,t2,t3,t4; 441 register int t1,t2,t3,t4;
370 DCTELEM *src, *dst; 442 DCTELEM *src, *dst;
543 void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { 615 void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
544 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; 616 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
545 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; 617 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
546 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; 618 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
547 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; 619 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
620 dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_c;
621 dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_c;
622 dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_c;
623 dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c;
548 dsp->vc1_h_overlap = vc1_h_overlap_c; 624 dsp->vc1_h_overlap = vc1_h_overlap_c;
549 dsp->vc1_v_overlap = vc1_v_overlap_c; 625 dsp->vc1_v_overlap = vc1_v_overlap_c;
550 dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c; 626 dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c;
551 dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c; 627 dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c;
552 dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c; 628 dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c;