Mercurial > mplayer.hg
comparison liba52/resample_mmx.c @ 3574:8600f40003de
mmx opt
author | michael |
---|---|
date | Tue, 18 Dec 2001 00:56:08 +0000 |
parents | d14494d84c29 |
children | 01a2466e035e |
comparison
equal
deleted
inserted
replaced
3573:1b2ee529e7b7 | 3574:8600f40003de |
---|---|
18 #include <inttypes.h> | 18 #include <inttypes.h> |
19 #include "a52.h" | 19 #include "a52.h" |
20 #include "../config.h" | 20 #include "../config.h" |
21 | 21 |
22 #ifdef HAVE_MMX | 22 #ifdef HAVE_MMX |
23 static uint64_t __attribute__((aligned(16))) magicF2W= 0x43c0000043c00000LL; | 23 static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
24 static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; | |
25 static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; | |
26 static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; | |
24 #endif | 27 #endif |
25 | 28 |
26 static inline int16_t convert (int32_t i) | 29 static inline int16_t convert (int32_t i) |
27 { | 30 { |
28 if (i > 0x43c07fff) | 31 if (i > 0x43c07fff) |
46 int i; | 49 int i; |
47 int32_t * f = (int32_t *) _f; | 50 int32_t * f = (int32_t *) _f; |
48 | 51 |
49 switch (flags) { | 52 switch (flags) { |
50 case A52_MONO: | 53 case A52_MONO: |
54 #ifdef HAVE_MMX | |
55 asm volatile( | |
56 "movl $-512, %%esi \n\t" | |
57 "movq magicF2W, %%mm7 \n\t" | |
58 "movq wm1100, %%mm3 \n\t" | |
59 "movq wm0101, %%mm4 \n\t" | |
60 "movq wm1010, %%mm5 \n\t" | |
61 "pxor %%mm6, %%mm6 \n\t" | |
62 "1: \n\t" | |
63 "movq (%1, %%esi, 2), %%mm0 \n\t" | |
64 "movq 8(%1, %%esi, 2), %%mm1 \n\t" | |
65 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
66 "psubd %%mm7, %%mm0 \n\t" | |
67 "psubd %%mm7, %%mm1 \n\t" | |
68 "packssdw %%mm1, %%mm0 \n\t" | |
69 "movq %%mm0, %%mm1 \n\t" | |
70 "pand %%mm4, %%mm0 \n\t" | |
71 "pand %%mm5, %%mm1 \n\t" | |
72 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 | |
73 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 | |
74 "pand %%mm3, %%mm0 \n\t" | |
75 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 | |
76 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B | |
77 "pand %%mm3, %%mm1 \n\t" | |
78 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 | |
79 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 | |
80 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B | |
81 "addl $8, %%esi \n\t" | |
82 " jnz 1b \n\t" | |
83 "emms \n\t" | |
84 :: "r" (s16+1280), "r" (f+256) | |
85 :"%esi", "%edi", "memory" | |
86 ); | |
87 #else | |
51 for (i = 0; i < 256; i++) { | 88 for (i = 0; i < 256; i++) { |
52 s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; | 89 s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; |
53 s16[5*i+4] = convert (f[i]); | 90 s16[5*i+4] = convert (f[i]); |
54 } | 91 } |
92 #endif | |
55 break; | 93 break; |
56 case A52_CHANNEL: | 94 case A52_CHANNEL: |
57 case A52_STEREO: | 95 case A52_STEREO: |
58 case A52_DOLBY: | 96 case A52_DOLBY: |
59 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it | 97 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |