Mercurial > libavcodec.hg
comparison vc1dsp.c @ 9859:7a116de63777 libavcodec
idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.
Includes mmx2 asm for the various functions.
Note that the actual idct still does not have an x86 SIMD implemtation.
For wmv3 files using regular idct, the decoder just falls back to simple_idct,
since simple_idct_dc doesn't exist (yet).
author | darkshikari |
---|---|
date | Tue, 16 Jun 2009 09:00:55 +0000 |
parents | 3970fe47fea3 |
children | bf309c7ce615 |
comparison
equal
deleted
inserted
replaced
9858:53d5914a30ef | 9859:7a116de63777 |
---|---|
176 vc1_loop_filter(src, stride, 1, 16, pq); | 176 vc1_loop_filter(src, stride, 1, 16, pq); |
177 } | 177 } |
178 | 178 |
179 /** Do inverse transform on 8x8 block | 179 /** Do inverse transform on 8x8 block |
180 */ | 180 */ |
181 static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |
182 { | |
183 int i; | |
184 int dc = block[0]; | |
185 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
186 dc = (3 * dc + 1) >> 1; | |
187 dc = (3 * dc + 16) >> 5; | |
188 for(i = 0; i < 8; i++){ | |
189 dest[0] = cm[dest[0]+dc]; | |
190 dest[1] = cm[dest[1]+dc]; | |
191 dest[2] = cm[dest[2]+dc]; | |
192 dest[3] = cm[dest[3]+dc]; | |
193 dest[4] = cm[dest[4]+dc]; | |
194 dest[5] = cm[dest[5]+dc]; | |
195 dest[6] = cm[dest[6]+dc]; | |
196 dest[7] = cm[dest[7]+dc]; | |
197 dest += linesize; | |
198 } | |
199 } | |
200 | |
181 static void vc1_inv_trans_8x8_c(DCTELEM block[64]) | 201 static void vc1_inv_trans_8x8_c(DCTELEM block[64]) |
182 { | 202 { |
183 int i; | 203 int i; |
184 register int t1,t2,t3,t4,t5,t6,t7,t8; | 204 register int t1,t2,t3,t4,t5,t6,t7,t8; |
185 DCTELEM *src, *dst; | 205 DCTELEM *src, *dst; |
247 } | 267 } |
248 } | 268 } |
249 | 269 |
250 /** Do inverse transform on 8x4 part of block | 270 /** Do inverse transform on 8x4 part of block |
251 */ | 271 */ |
272 static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |
273 { | |
274 int i; | |
275 int dc = block[0]; | |
276 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
277 dc = ( 3 * dc + 1) >> 1; | |
278 dc = (17 * dc + 64) >> 7; | |
279 for(i = 0; i < 4; i++){ | |
280 dest[0] = cm[dest[0]+dc]; | |
281 dest[1] = cm[dest[1]+dc]; | |
282 dest[2] = cm[dest[2]+dc]; | |
283 dest[3] = cm[dest[3]+dc]; | |
284 dest[4] = cm[dest[4]+dc]; | |
285 dest[5] = cm[dest[5]+dc]; | |
286 dest[6] = cm[dest[6]+dc]; | |
287 dest[7] = cm[dest[7]+dc]; | |
288 dest += linesize; | |
289 } | |
290 } | |
291 | |
252 static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) | 292 static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) |
253 { | 293 { |
254 int i; | 294 int i; |
255 register int t1,t2,t3,t4,t5,t6,t7,t8; | 295 register int t1,t2,t3,t4,t5,t6,t7,t8; |
256 DCTELEM *src, *dst; | 296 DCTELEM *src, *dst; |
304 } | 344 } |
305 } | 345 } |
306 | 346 |
307 /** Do inverse transform on 4x8 parts of block | 347 /** Do inverse transform on 4x8 parts of block |
308 */ | 348 */ |
349 static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |
350 { | |
351 int i; | |
352 int dc = block[0]; | |
353 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
354 dc = (17 * dc + 4) >> 3; | |
355 dc = (12 * dc + 64) >> 7; | |
356 for(i = 0; i < 8; i++){ | |
357 dest[0] = cm[dest[0]+dc]; | |
358 dest[1] = cm[dest[1]+dc]; | |
359 dest[2] = cm[dest[2]+dc]; | |
360 dest[3] = cm[dest[3]+dc]; | |
361 dest += linesize; | |
362 } | |
363 } | |
364 | |
309 static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) | 365 static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) |
310 { | 366 { |
311 int i; | 367 int i; |
312 register int t1,t2,t3,t4,t5,t6,t7,t8; | 368 register int t1,t2,t3,t4,t5,t6,t7,t8; |
313 DCTELEM *src, *dst; | 369 DCTELEM *src, *dst; |
361 } | 417 } |
362 } | 418 } |
363 | 419 |
364 /** Do inverse transform on 4x4 part of block | 420 /** Do inverse transform on 4x4 part of block |
365 */ | 421 */ |
422 static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) | |
423 { | |
424 int i; | |
425 int dc = block[0]; | |
426 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |
427 dc = (17 * dc + 4) >> 3; | |
428 dc = (17 * dc + 64) >> 7; | |
429 for(i = 0; i < 4; i++){ | |
430 dest[0] = cm[dest[0]+dc]; | |
431 dest[1] = cm[dest[1]+dc]; | |
432 dest[2] = cm[dest[2]+dc]; | |
433 dest[3] = cm[dest[3]+dc]; | |
434 dest += linesize; | |
435 } | |
436 } | |
437 | |
366 static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) | 438 static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) |
367 { | 439 { |
368 int i; | 440 int i; |
369 register int t1,t2,t3,t4; | 441 register int t1,t2,t3,t4; |
370 DCTELEM *src, *dst; | 442 DCTELEM *src, *dst; |
543 void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { | 615 void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { |
544 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; | 616 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; |
545 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; | 617 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; |
546 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; | 618 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; |
547 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; | 619 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; |
620 dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_c; | |
621 dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_c; | |
622 dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_c; | |
623 dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c; | |
548 dsp->vc1_h_overlap = vc1_h_overlap_c; | 624 dsp->vc1_h_overlap = vc1_h_overlap_c; |
549 dsp->vc1_v_overlap = vc1_v_overlap_c; | 625 dsp->vc1_v_overlap = vc1_v_overlap_c; |
550 dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c; | 626 dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c; |
551 dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c; | 627 dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c; |
552 dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c; | 628 dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c; |