comparison liba52/resample_mmx.c @ 3567:9e1e88b3ca18

mmx opt
author michael
date Mon, 17 Dec 2001 19:33:19 +0000
parents 21d65a4ae3c9
children d14494d84c29
comparison
equal deleted inserted replaced
3566:92b3028b10f3 3567:9e1e88b3ca18
7 7
8 // a52_resample() should do the conversion. 8 // a52_resample() should do the conversion.
9 9
10 #include <inttypes.h> 10 #include <inttypes.h>
11 #include "a52.h" 11 #include "a52.h"
12 #include "../config.h"
13
14 #ifdef HAVE_MMX
15 static uint64_t __attribute__((aligned(16))) magicF2W= 0x43c0000043c00000LL;
16 #endif
12 17
13 static inline int16_t convert (int32_t i) 18 static inline int16_t convert (int32_t i)
14 { 19 {
15 if (i > 0x43c07fff) 20 if (i > 0x43c07fff)
16 return 32767; 21 return 32767;
41 } 46 }
42 break; 47 break;
43 case A52_CHANNEL: 48 case A52_CHANNEL:
44 case A52_STEREO: 49 case A52_STEREO:
45 case A52_DOLBY: 50 case A52_DOLBY:
51 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
52 #ifdef HAVE_SSE
53 asm volatile(
54 "movl $-1024, %%esi \n\t"
55 "1: \n\t"
56 "cvtps2pi (%1, %%esi), %%mm0 \n\t"
57 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t"
58 "movq %%mm0, %%mm1 \n\t"
59 "punpcklwd %%mm2, %%mm0 \n\t"
60 "punpckhwd %%mm2, %%mm1 \n\t"
61 "movq %%mm0, (%0, %%esi) \n\t"
62 "movq %%mm1, 8(%0, %%esi) \n\t"
63 "addl $16, %%esi \n\t"
64 " jnz 1b \n\t"
65 "emms \n\t"
66 :: "r" (s16+512), "r" (f+256)
67 :"%esi", "memory"
68 );*/
69 #ifdef HAVE_MMX
70 asm volatile(
71 "movl $-1024, %%esi \n\t"
72 "movq magicF2W, %%mm7 \n\t"
73 "1: \n\t"
74 "movq (%1, %%esi), %%mm0 \n\t"
75 "movq 8(%1, %%esi), %%mm1 \n\t"
76 "movq 1024(%1, %%esi), %%mm2 \n\t"
77 "movq 1032(%1, %%esi), %%mm3 \n\t"
78 "psubd %%mm7, %%mm0 \n\t"
79 "psubd %%mm7, %%mm1 \n\t"
80 "psubd %%mm7, %%mm2 \n\t"
81 "psubd %%mm7, %%mm3 \n\t"
82 "packssdw %%mm1, %%mm0 \n\t"
83 "packssdw %%mm3, %%mm2 \n\t"
84 "movq %%mm0, %%mm1 \n\t"
85 "punpcklwd %%mm2, %%mm0 \n\t"
86 "punpckhwd %%mm2, %%mm1 \n\t"
87 "movq %%mm0, (%0, %%esi) \n\t"
88 "movq %%mm1, 8(%0, %%esi) \n\t"
89 "addl $16, %%esi \n\t"
90 " jnz 1b \n\t"
91 "emms \n\t"
92 :: "r" (s16+512), "r" (f+256)
93 :"%esi", "memory"
94 );
95 #else
46 for (i = 0; i < 256; i++) { 96 for (i = 0; i < 256; i++) {
47 s16[2*i] = convert (f[i]); 97 s16[2*i] = convert (f[i]);
48 s16[2*i+1] = convert (f[i+256]); 98 s16[2*i+1] = convert (f[i+256]);
49 } 99 }
100 #endif
50 break; 101 break;
51 case A52_3F: 102 case A52_3F:
52 for (i = 0; i < 256; i++) { 103 for (i = 0; i < 256; i++) {
53 s16[5*i] = convert (f[i]); 104 s16[5*i] = convert (f[i]);
54 s16[5*i+1] = convert (f[i+512]); 105 s16[5*i+1] = convert (f[i+512]);