Mercurial > mplayer.hg
comparison liba52/resample_mmx.c @ 3567:9e1e88b3ca18
mmx opt
author | michael |
---|---|
date | Mon, 17 Dec 2001 19:33:19 +0000 |
parents | 21d65a4ae3c9 |
children | d14494d84c29 |
comparison
equal
deleted
inserted
replaced
3566:92b3028b10f3 | 3567:9e1e88b3ca18 |
---|---|
7 | 7 |
8 // a52_resample() should do the conversion. | 8 // a52_resample() should do the conversion. |
9 | 9 |
10 #include <inttypes.h> | 10 #include <inttypes.h> |
11 #include "a52.h" | 11 #include "a52.h" |
12 #include "../config.h" | |
13 | |
14 #ifdef HAVE_MMX | |
15 static uint64_t __attribute__((aligned(16))) magicF2W= 0x43c0000043c00000LL; | |
16 #endif | |
12 | 17 |
13 static inline int16_t convert (int32_t i) | 18 static inline int16_t convert (int32_t i) |
14 { | 19 { |
15 if (i > 0x43c07fff) | 20 if (i > 0x43c07fff) |
16 return 32767; | 21 return 32767; |
41 } | 46 } |
42 break; | 47 break; |
43 case A52_CHANNEL: | 48 case A52_CHANNEL: |
44 case A52_STEREO: | 49 case A52_STEREO: |
45 case A52_DOLBY: | 50 case A52_DOLBY: |
51 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it | |
52 #ifdef HAVE_SSE | |
53 asm volatile( | |
54 "movl $-1024, %%esi \n\t" | |
55 "1: \n\t" | |
56 "cvtps2pi (%1, %%esi), %%mm0 \n\t" | |
57 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" | |
58 "movq %%mm0, %%mm1 \n\t" | |
59 "punpcklwd %%mm2, %%mm0 \n\t" | |
60 "punpckhwd %%mm2, %%mm1 \n\t" | |
61 "movq %%mm0, (%0, %%esi) \n\t" | |
62 "movq %%mm1, 8(%0, %%esi) \n\t" | |
63 "addl $16, %%esi \n\t" | |
64 " jnz 1b \n\t" | |
65 "emms \n\t" | |
66 :: "r" (s16+512), "r" (f+256) | |
67 :"%esi", "memory" | |
68 );*/ | |
69 #ifdef HAVE_MMX | |
70 asm volatile( | |
71 "movl $-1024, %%esi \n\t" | |
72 "movq magicF2W, %%mm7 \n\t" | |
73 "1: \n\t" | |
74 "movq (%1, %%esi), %%mm0 \n\t" | |
75 "movq 8(%1, %%esi), %%mm1 \n\t" | |
76 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
77 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
78 "psubd %%mm7, %%mm0 \n\t" | |
79 "psubd %%mm7, %%mm1 \n\t" | |
80 "psubd %%mm7, %%mm2 \n\t" | |
81 "psubd %%mm7, %%mm3 \n\t" | |
82 "packssdw %%mm1, %%mm0 \n\t" | |
83 "packssdw %%mm3, %%mm2 \n\t" | |
84 "movq %%mm0, %%mm1 \n\t" | |
85 "punpcklwd %%mm2, %%mm0 \n\t" | |
86 "punpckhwd %%mm2, %%mm1 \n\t" | |
87 "movq %%mm0, (%0, %%esi) \n\t" | |
88 "movq %%mm1, 8(%0, %%esi) \n\t" | |
89 "addl $16, %%esi \n\t" | |
90 " jnz 1b \n\t" | |
91 "emms \n\t" | |
92 :: "r" (s16+512), "r" (f+256) | |
93 :"%esi", "memory" | |
94 ); | |
95 #else | |
46 for (i = 0; i < 256; i++) { | 96 for (i = 0; i < 256; i++) { |
47 s16[2*i] = convert (f[i]); | 97 s16[2*i] = convert (f[i]); |
48 s16[2*i+1] = convert (f[i+256]); | 98 s16[2*i+1] = convert (f[i+256]); |
49 } | 99 } |
100 #endif | |
50 break; | 101 break; |
51 case A52_3F: | 102 case A52_3F: |
52 for (i = 0; i < 256; i++) { | 103 for (i = 0; i < 256; i++) { |
53 s16[5*i] = convert (f[i]); | 104 s16[5*i] = convert (f[i]); |
54 s16[5*i+1] = convert (f[i+512]); | 105 s16[5*i+1] = convert (f[i+512]); |