comparison mpegaudiodec.c @ 3603:42b6cefc6c1a libavcodec

replacing MULH by asm for x86 about 30% faster imdct36()
author michael
date Tue, 22 Aug 2006 11:51:09 +0000
parents 38b7b3629249
children dad0296d4522
comparison
equal deleted inserted replaced
3602:38b7b3629249 3603:42b6cefc6c1a
41 41
42 #include "mpegaudio.h" 42 #include "mpegaudio.h"
43 43
44 #define FRAC_ONE (1 << FRAC_BITS) 44 #define FRAC_ONE (1 << FRAC_BITS)
45 45
46 #define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS) 46 #ifdef ARCH_X86
47 #define MUL64(a,b) ((int64_t)(a) * (int64_t)(b)) 47 # define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
48 # define MUL64(ra, rb) \
49 ({ int64_t rt; asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb)); rt; })
50 # define MULH(ra, rb) \
51 ({ int rt, dummy; asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb)); rt; })
52 #else
53 # define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
54 # define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
55 //#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this
56 static always_inline int MULH(int a, int b){
57 return ((int64_t)(a) * (int64_t)(b))>>32;
58 }
59 #endif
48 #define FIX(a) ((int)((a) * FRAC_ONE)) 60 #define FIX(a) ((int)((a) * FRAC_ONE))
49 /* WARNING: only correct for posititive numbers */ 61 /* WARNING: only correct for posititive numbers */
50 #define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) 62 #define FIXR(a) ((int)((a) * FRAC_ONE + 0.5))
51 #define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS) 63 #define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)
52 64
53 #define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) 65 #define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
54 //#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this
55 static always_inline int MULH(int a, int b){
56 return ((int64_t)(a) * (int64_t)(b))>>32;
57 }
58 66
59 /****************/ 67 /****************/
60 68
61 #define HEADER_SIZE 4 69 #define HEADER_SIZE 4
62 #define BACKSTEP_SIZE 512 70 #define BACKSTEP_SIZE 512
777 else if (sum1 > OUT_MAX) 785 else if (sum1 > OUT_MAX)
778 sum1 = OUT_MAX; 786 sum1 = OUT_MAX;
779 return sum1; 787 return sum1;
780 } 788 }
781 789
782 # ifdef ARCH_X86 790 # define MULS(ra, rb) MUL64(ra, rb)
783 /* ask gcc devels why this is 3 times faster then the generic code below */
784 # define MULS(ra, rb) \
785 ({ int64_t rt; asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb)); rt; })
786 # else
787 # define MULS(ra, rb) MUL64(ra, rb)
788 # endif
789 #endif 791 #endif
790 792
791 #define SUM8(sum, op, w, p) \ 793 #define SUM8(sum, op, w, p) \
792 { \ 794 { \
793 sum op MULS((w)[0 * 64], p[0 * 64]);\ 795 sum op MULS((w)[0 * 64], p[0 * 64]);\