# HG changeset patch # User mru # Date 1242009710 0 # Node ID 7c4ab94a82d85eca6ed5c1d94fc9f49883635e90 # Parent 777deb3dc12b33fdc88755497bf8238f7483fc4f PPC: 32-bit asm for MAC64 and MLS64 GCC makes a mess of these operations, so give it a hand. 55% faster MP3 decoding on G4. diff -r 777deb3dc12b -r 7c4ab94a82d8 ppc/mathops.h --- a/ppc/mathops.h Sun May 10 22:36:16 2009 +0000 +++ b/ppc/mathops.h Mon May 11 02:41:50 2009 +0000 @@ -44,4 +44,34 @@ return r; } +#if !HAVE_PPC64 +static inline av_const int64_t MAC64(int64_t d, int a, int b) +{ + union { uint64_t x; unsigned hl[2]; } x = { d }; + int h, l; + __asm__ ("mullw %3, %4, %5 \n\t" + "mulhw %2, %4, %5 \n\t" + "addc %1, %1, %3 \n\t" + "adde %0, %0, %2 \n\t" + : "+r"(x.hl[0]), "+r"(x.hl[1]), "=&r"(h), "=&r"(l) + : "r"(a), "r"(b)); + return x.x; +} +#define MAC64(d, a, b) ((d) = MAC64(d, a, b)) + +static inline av_const int64_t MLS64(int64_t d, int a, int b) +{ + union { uint64_t x; unsigned hl[2]; } x = { d }; + int h, l; + __asm__ ("mullw %3, %4, %5 \n\t" + "mulhw %2, %4, %5 \n\t" + "subfc %1, %3, %1 \n\t" + "subfe %0, %2, %0 \n\t" + : "+r"(x.hl[0]), "+r"(x.hl[1]), "=&r"(h), "=&r"(l) + : "r"(a), "r"(b)); + return x.x; +} +#define MLS64(d, a, b) ((d) = MLS64(d, a, b)) +#endif + #endif /* AVCODEC_PPC_MATHOPS_H */