comparison liba52/resample_c.c @ 3574:8600f40003de

mmx opt
author michael
date Tue, 18 Dec 2001 00:56:08 +0000
parents d14494d84c29
children 01a2466e035e
comparison
equal deleted inserted replaced
3573:1b2ee529e7b7 3574:8600f40003de
18 #include <inttypes.h> 18 #include <inttypes.h>
19 #include "a52.h" 19 #include "a52.h"
20 #include "../config.h" 20 #include "../config.h"
21 21
22 #ifdef HAVE_MMX 22 #ifdef HAVE_MMX
23 static uint64_t __attribute__((aligned(16))) magicF2W= 0x43c0000043c00000LL; 23 static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL;
24 static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL;
25 static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL;
26 static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL;
24 #endif 27 #endif
25 28
26 static inline int16_t convert (int32_t i) 29 static inline int16_t convert (int32_t i)
27 { 30 {
28 if (i > 0x43c07fff) 31 if (i > 0x43c07fff)
46 int i; 49 int i;
47 int32_t * f = (int32_t *) _f; 50 int32_t * f = (int32_t *) _f;
48 51
49 switch (flags) { 52 switch (flags) {
50 case A52_MONO: 53 case A52_MONO:
54 #ifdef HAVE_MMX
55 asm volatile(
56 "movl $-512, %%esi \n\t"
57 "movq magicF2W, %%mm7 \n\t"
58 "movq wm1100, %%mm3 \n\t"
59 "movq wm0101, %%mm4 \n\t"
60 "movq wm1010, %%mm5 \n\t"
61 "pxor %%mm6, %%mm6 \n\t"
62 "1: \n\t"
63 "movq (%1, %%esi, 2), %%mm0 \n\t"
64 "movq 8(%1, %%esi, 2), %%mm1 \n\t"
65 "leal (%%esi, %%esi, 4), %%edi \n\t"
66 "psubd %%mm7, %%mm0 \n\t"
67 "psubd %%mm7, %%mm1 \n\t"
68 "packssdw %%mm1, %%mm0 \n\t"
69 "movq %%mm0, %%mm1 \n\t"
70 "pand %%mm4, %%mm0 \n\t"
71 "pand %%mm5, %%mm1 \n\t"
72 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0
73 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0
74 "pand %%mm3, %%mm0 \n\t"
75 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0
76 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B
77 "pand %%mm3, %%mm1 \n\t"
78 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0
79 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0
80 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B
81 "addl $8, %%esi \n\t"
82 " jnz 1b \n\t"
83 "emms \n\t"
84 :: "r" (s16+1280), "r" (f+256)
85 :"%esi", "%edi", "memory"
86 );
87 #else
51 for (i = 0; i < 256; i++) { 88 for (i = 0; i < 256; i++) {
52 s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; 89 s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0;
53 s16[5*i+4] = convert (f[i]); 90 s16[5*i+4] = convert (f[i]);
54 } 91 }
92 #endif
55 break; 93 break;
56 case A52_CHANNEL: 94 case A52_CHANNEL:
57 case A52_STEREO: 95 case A52_STEREO:
58 case A52_DOLBY: 96 case A52_DOLBY:
59 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it 97 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it