Mercurial > libavcodec.hg
comparison ppc/mathops.h @ 9625:7c4ab94a82d8 libavcodec
PPC: 32-bit asm for MAC64 and MLS64
GCC makes a mess of these operations, so give it a hand.
55% faster MP3 decoding on G4.
author | mru |
---|---|
date | Mon, 11 May 2009 02:41:50 +0000 |
parents | 182f35c8eaed |
children | d762f9979193 |
comparison
equal
deleted
inserted
replaced
9624:777deb3dc12b | 9625:7c4ab94a82d8 |
---|---|
42 int r; | 42 int r; |
43 __asm__ ("mulhw %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); | 43 __asm__ ("mulhw %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); |
44 return r; | 44 return r; |
45 } | 45 } |
46 | 46 |
47 #if !HAVE_PPC64 | |
48 static inline av_const int64_t MAC64(int64_t d, int a, int b) | |
49 { | |
50 union { uint64_t x; unsigned hl[2]; } x = { d }; | |
51 int h, l; | |
52 __asm__ ("mullw %3, %4, %5 \n\t" | |
53 "mulhw %2, %4, %5 \n\t" | |
54 "addc %1, %1, %3 \n\t" | |
55 "adde %0, %0, %2 \n\t" | |
56 : "+r"(x.hl[0]), "+r"(x.hl[1]), "=&r"(h), "=&r"(l) | |
57 : "r"(a), "r"(b)); | |
58 return x.x; | |
59 } | |
60 #define MAC64(d, a, b) ((d) = MAC64(d, a, b)) | |
61 | |
62 static inline av_const int64_t MLS64(int64_t d, int a, int b) | |
63 { | |
64 union { uint64_t x; unsigned hl[2]; } x = { d }; | |
65 int h, l; | |
66 __asm__ ("mullw %3, %4, %5 \n\t" | |
67 "mulhw %2, %4, %5 \n\t" | |
68 "subfc %1, %3, %1 \n\t" | |
69 "subfe %0, %2, %0 \n\t" | |
70 : "+r"(x.hl[0]), "+r"(x.hl[1]), "=&r"(h), "=&r"(l) | |
71 : "r"(a), "r"(b)); | |
72 return x.x; | |
73 } | |
74 #define MLS64(d, a, b) ((d) = MLS64(d, a, b)) | |
75 #endif | |
76 | |
47 #endif /* AVCODEC_PPC_MATHOPS_H */ | 77 #endif /* AVCODEC_PPC_MATHOPS_H */ |