comparison msmpeg4.c @ 225:ae145876789d libavcodec

use multiply instead of divides for DC prediction on X86
author michaelni
date Tue, 05 Feb 2002 22:51:23 +0000
parents fe243b4aec02
children 3c2bad70a196
comparison
equal deleted inserted replaced
224:8b3e70afa2ba 225:ae145876789d
48 int n, int coded); 48 int n, int coded);
49 static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); 49 static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
50 static int msmpeg4_decode_motion(MpegEncContext * s, 50 static int msmpeg4_decode_motion(MpegEncContext * s,
51 int *mx_ptr, int *my_ptr); 51 int *mx_ptr, int *my_ptr);
52 52
53 extern UINT32 inverse[256];
54
53 #ifdef DEBUG 55 #ifdef DEBUG
54 int intra_count = 0; 56 int intra_count = 0;
55 int frame_count = 0; 57 int frame_count = 0;
56 #endif 58 #endif
57 /* XXX: move it to mpegvideo.h */ 59 /* XXX: move it to mpegvideo.h */
436 /* XXX: the following solution consumes divisions, but it does not 438 /* XXX: the following solution consumes divisions, but it does not
437 necessitate to modify mpegvideo.c. The problem comes from the 439 necessitate to modify mpegvideo.c. The problem comes from the
438 fact they decided to store the quantized DC (which would lead 440 fact they decided to store the quantized DC (which would lead
439 to problems if Q could vary !) */ 441 to problems if Q could vary !) */
440 #if defined ARCH_X86 && !defined PIC 442 #if defined ARCH_X86 && !defined PIC
441 /* using 16bit divisions as they are large enough and 2x as fast */
442 asm volatile( 443 asm volatile(
443 "movl %3, %%eax \n\t" 444 "movl %3, %%eax \n\t"
444 "shrl $1, %%eax \n\t" 445 "shrl $1, %%eax \n\t"
445 "addl %%eax, %2 \n\t" 446 "addl %%eax, %2 \n\t"
446 "addl %%eax, %1 \n\t" 447 "addl %%eax, %1 \n\t"
447 "addl %0, %%eax \n\t" 448 "addl %0, %%eax \n\t"
448 "xorl %%edx, %%edx \n\t" 449 "mull %4 \n\t"
449 "divw %w3 \n\t" 450 "movl %%edx, %0 \n\t"
450 "movzwl %%ax, %0 \n\t"
451 "movl %1, %%eax \n\t" 451 "movl %1, %%eax \n\t"
452 "xorl %%edx, %%edx \n\t" 452 "mull %4 \n\t"
453 "divw %w3 \n\t" 453 "movl %%edx, %1 \n\t"
454 "movzwl %%ax, %1 \n\t"
455 "movl %2, %%eax \n\t" 454 "movl %2, %%eax \n\t"
456 "xorl %%edx, %%edx \n\t" 455 "mull %4 \n\t"
457 "divw %w3 \n\t" 456 "movl %%edx, %2 \n\t"
458 "movzwl %%ax, %2 \n\t" 457 : "+r" (a), "+r" (b), "+r" (c)
459 : "+r" (a), "+r" (b), "+r" (c) 458 : "g" (scale), "r" (inverse[scale])
460 : "r" (scale)
461 : "%eax", "%edx" 459 : "%eax", "%edx"
462 ); 460 );
463 #else 461 #else
464 /* #elif defined (ARCH_ALPHA) */ 462 /* #elif defined (ARCH_ALPHA) */
465 /* Divisions are extremely costly on Alpha; optimize the most 463 /* Divisions are extremely costly on Alpha; optimize the most