Mercurial > libavcodec.hg
comparison mpegaudiodec.c @ 3603:42b6cefc6c1a libavcodec
replacing MULH by asm for x86
about 30% faster imdct36()
author | michael |
---|---|
date | Tue, 22 Aug 2006 11:51:09 +0000 |
parents | 38b7b3629249 |
children | dad0296d4522 |
comparison
equal
deleted
inserted
replaced
3602:38b7b3629249 | 3603:42b6cefc6c1a |
---|---|
41 | 41 |
42 #include "mpegaudio.h" | 42 #include "mpegaudio.h" |
43 | 43 |
44 #define FRAC_ONE (1 << FRAC_BITS) | 44 #define FRAC_ONE (1 << FRAC_BITS) |
45 | 45 |
46 #define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS) | 46 #ifdef ARCH_X86 |
47 #define MUL64(a,b) ((int64_t)(a) * (int64_t)(b)) | 47 # define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS) |
48 # define MUL64(ra, rb) \ | |
49 ({ int64_t rt; asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb)); rt; }) | |
50 # define MULH(ra, rb) \ | |
51 ({ int rt, dummy; asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb)); rt; }) | |
52 #else | |
53 # define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS) | |
54 # define MUL64(a,b) ((int64_t)(a) * (int64_t)(b)) | |
55 //#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this | |
56 static always_inline int MULH(int a, int b){ | |
57 return ((int64_t)(a) * (int64_t)(b))>>32; | |
58 } | |
59 #endif | |
48 #define FIX(a) ((int)((a) * FRAC_ONE)) | 60 #define FIX(a) ((int)((a) * FRAC_ONE)) |
49 /* WARNING: only correct for posititive numbers */ | 61 /* WARNING: only correct for posititive numbers */ |
50 #define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) | 62 #define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) |
51 #define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS) | 63 #define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS) |
52 | 64 |
53 #define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) | 65 #define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) |
54 //#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this | |
55 static always_inline int MULH(int a, int b){ | |
56 return ((int64_t)(a) * (int64_t)(b))>>32; | |
57 } | |
58 | 66 |
59 /****************/ | 67 /****************/ |
60 | 68 |
61 #define HEADER_SIZE 4 | 69 #define HEADER_SIZE 4 |
62 #define BACKSTEP_SIZE 512 | 70 #define BACKSTEP_SIZE 512 |
777 else if (sum1 > OUT_MAX) | 785 else if (sum1 > OUT_MAX) |
778 sum1 = OUT_MAX; | 786 sum1 = OUT_MAX; |
779 return sum1; | 787 return sum1; |
780 } | 788 } |
781 | 789 |
782 # ifdef ARCH_X86 | 790 # define MULS(ra, rb) MUL64(ra, rb) |
783 /* ask gcc devels why this is 3 times faster then the generic code below */ | |
784 # define MULS(ra, rb) \ | |
785 ({ int64_t rt; asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb)); rt; }) | |
786 # else | |
787 # define MULS(ra, rb) MUL64(ra, rb) | |
788 # endif | |
789 #endif | 791 #endif |
790 | 792 |
791 #define SUM8(sum, op, w, p) \ | 793 #define SUM8(sum, op, w, p) \ |
792 { \ | 794 { \ |
793 sum op MULS((w)[0 * 64], p[0 * 64]);\ | 795 sum op MULS((w)[0 * 64], p[0 * 64]);\ |