annotate liba52/srfftp_3dnow.h @ 1048:48cf588e2c90

uj aocontrol valtozok
author al3x
date Thu, 07 Jun 2001 13:06:30 +0000
parents d44a690543ac
children 970fbd433564
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
1 /*
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
2 * srfftp.h
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
3 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
4 * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
5 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
6 * 64 and 128 point split radix fft for ac3dec
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
7 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
8 * The algorithm is desribed in the book:
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
9 * "Computational Frameworks of the Fast Fourier Transform".
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
10 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
11 * The ideas and the the organization of code borrowed from djbfft written by
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
12 * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
13 * http://cr.yp.to/djbfft.html.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
14 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
15 * srfftp.h is free software; you can redistribute it and/or modify
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
16 * it under the terms of the GNU General Public License as published by
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
17 * the Free Software Foundation; either version 2, or (at your option)
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
18 * any later version.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
19 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
20 * srfftp.h is distributed in the hope that it will be useful,
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
23 * GNU General Public License for more details.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
24 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
25 * You should have received a copy of the GNU General Public License
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
26 * along with GNU Make; see the file COPYING. If not, write to
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
27 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
28 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
29 * Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
30 * by Nick Kurshev <nickols_k@mail.ru>
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
31 */
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
32
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
33 #ifndef SRFFTP_3DNOW_H__
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
34 #define SRFFTP_3DNOW_H__
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
35
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
36 static complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
37
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
38 #ifdef HAVE_3DNOWEX
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
39 #define TRANS_FILL_MM6_MM7_3DNOW()\
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
40 __asm__ __volatile__(\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
41 "movl $-1, %%eax\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
42 "movd %%eax, %%mm7\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
43 "negl %%eax\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
44 "movd %%eax, %%mm6\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
45 "punpckldq %%mm6, %%mm7\n\t" /* -1.0 | 1.0 */\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
46 "pi2fd %%mm7, %%mm7\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
47 "pswapd %%mm7, %%mm6\n\t"/* 1.0 | -1.0 */\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
48 :::"eax","memory");
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
49 #else
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
50 #define TRANS_FILL_MM6_MM7_3DNOW()\
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
51 __asm__ __volatile__(\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
52 "movl $-1, %%eax\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
53 "movd %%eax, %%mm7\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
54 "negl %%eax\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
55 "movd %%eax, %%mm6\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
56 "punpckldq %%mm6, %%mm7\n\t" /* -1.0 | 1.0 */\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
57 "punpckldq %%mm7, %%mm6\n\t" /* 1.0 | -1.0 */\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
58 "pi2fd %%mm7, %%mm7\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
59 "pi2fd %%mm6, %%mm6\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
60 :::"eax","memory");
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
61 #endif
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
62
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
63 #ifdef HAVE_3DNOWEX
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
64 #define PSWAP_MM(mm_base,mm_hlp) "pswapd "##mm_base","##mm_base"\n\t"
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
65 #else
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
66 #define PSWAP_MM(mm_base,mm_hlp)\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
67 "movq "##mm_base","##mm_hlp"\n\t"\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
68 "psrlq $32, "##mm_base"\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
69 "punpckldq "##mm_hlp","##mm_base"\n\t"
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
70 #endif
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
71 #ifdef HAVE_3DNOWEX
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
72 #define PFNACC_MM(mm_base,mm_hlp) "pfnacc "##mm_base","##mm_base"\n\t"
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
73 #else
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
74 #define PFNACC_MM(mm_base,mm_hlp)\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
75 "movq "##mm_base","##mm_hlp"\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
76 "psrlq $32,"##mm_hlp"\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
77 "punpckldq "##mm_hlp","##mm_hlp"\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
78 "pfsub "##mm_hlp","##mm_base"\n\t"
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
79 #endif
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
80
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
81 #define TRANSZERO_3DNOW(A0,A4,A8,A12) \
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
82 { \
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
83 __asm__ __volatile__(\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
84 "movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
85 "movq %5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
86 "movq %%mm0, %%mm5\n\t"/*u.re = wTB[0].re + wTB[k*2].re;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
87 "pfadd %%mm1, %%mm5\n\t"/*u.im = wTB[0].im + wTB[k*2].im; mm5 = u*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
88 "pfmul %%mm6, %%mm0\n\t"/*mm0 = wTB[0].re | -wTB[0].im */\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
89 "pfmul %%mm7, %%mm1\n\t"/*mm1 = -wTB[k*2].re | wTB[k*2].im */\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
90 "pfadd %%mm1, %%mm0\n\t"/*v.im = wTB[0].re - wTB[k*2].re;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
91 "movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
92 PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
93 "movq %6, %%mm0\n\t" /* a1 = A0;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
94 "movq %7, %%mm2\n\t" /* a1 = A4;*/\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
95 "movq %%mm0, %%mm1\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
96 "movq %%mm2, %%mm3\n\t"\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
97 "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
98 "pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
99 "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
100 "pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
101 "movq %%mm0, %0\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
102 "movq %%mm2, %3\n\t"\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
103 "movq %%mm1, %1\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
104 "movq %%mm3, %2"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
105 :"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
106 :"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
107 :"memory");\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
108 }
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
109
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
110 #define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
111 {\
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
112 __asm__ __volatile__(\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
113 "movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
114 "movq %%mm0, %%mm1\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
115 "pfmul %%mm7, %%mm1\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
116 "pfacc %%mm1, %%mm0\n\t"/*u.im = wTB[2].im - wTB[2].re; mm0 = u*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
117 "movq %5, %%mm1\n\t" /*a.re = wTB[6].im - wTB[6].re; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
118 "movq %%mm1, %%mm2\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
119 "pfmul %%mm7, %%mm1\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
120 "pfacc %%mm2, %%mm1\n\t"/*a.im = wTB[6].im + wTB[6].re; mm1 = a*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
121 "movq %%mm1, %%mm2\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
122 "pfmul %%mm7, %%mm2\n\t"/*v.im = u.re - a.re;*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
123 "movq %%mm0, %%mm3\n\t"/*v.re = u.im + a.im;*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
124 "pfadd %%mm2, %%mm3\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
125 PSWAP_MM("%%mm3","%%mm2")/*mm3 = v*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
126 "pfmul %%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
127 "pfadd %%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
128 "movq %8, %%mm2\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
129 "pfmul %%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
130 "pfmul %%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
131 "movq %6, %%mm1\n\t" /* a1 = A2;*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
132 "movq %7, %%mm5\n\t" /* a1 = A6;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
133 "movq %%mm1, %%mm2\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
134 "movq %%mm3, %%mm4\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
135 "pfadd %%mm0, %%mm1\n\t" /*A2 = a1 + u;*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
136 "pfmul %%mm6, %%mm4\n\t"/*A6.re = a1.re + v.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
137 "pfsub %%mm0, %%mm2\n\t" /*A2 = a1 - u;*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
138 "pfmul %%mm7, %%mm3\n\t"/*A14.re = a1.re - v.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
139 "movq %%mm1, %0\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
140 "movq %%mm2, %1\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
141 "movq %%mm5, %%mm2\n\t"\
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
142 "pfadd %%mm4, %%mm5\n\t"/*A6.im = a1.im - v.im;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
143 "pfadd %%mm3, %%mm2\n\t"/*A14.im = a1.im + v.im;*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
144 "movq %%mm5, %2\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
145 "movq %%mm2, %3"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
146 :"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
147 :"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
148 :"memory");\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
149 }
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
150
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
151 #define TRANS_3DNOW(A1,A5,A9,A13,WT,WB,D,D3)\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
152 { \
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
153 __asm__ __volatile__(\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
154 "movq %1, %%mm4\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
155 "movq %%mm4, %%mm5\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
156 "punpckldq %%mm4, %%mm4\n\t"/*mm4 = D.re | D.re */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
157 "punpckhdq %%mm5, %%mm5\n\t"/*mm5 = D.im | D.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
158 "movq %0, %%mm0\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
159 "pfmul %%mm0, %%mm4\n\t"/* mm4 =u.re | u.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
160 "pfmul %%mm0, %%mm5\n\t"/* mm5 = a.re | a.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
161 PSWAP_MM("%%mm5","%%mm3")\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
162 "pfmul %%mm7, %%mm5\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
163 "pfadd %%mm5, %%mm4\n\t"/* mm4 = u*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
164 "movq %3, %%mm1\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
165 "movq %2, %%mm0\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
166 PSWAP_MM("%%mm1","%%mm3")\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
167 "movq %%mm0, %%mm2\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
168 "pfmul %%mm1, %%mm0\n\t"/* mm0 = a*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
169 "pfmul %3, %%mm2\n\t"/* mm2 = v*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
170 PFNACC_MM("%%mm2","%%mm3")\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
171 "pfacc %%mm0, %%mm0\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
172 "movq %%mm4, %%mm5\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
173 "punpckldq %%mm0,%%mm2\n\t"/*mm2 = v.re | a.re*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
174 "pfmul %%mm6, %%mm5\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
175 "movq %%mm2, %%mm3\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
176 "pfmul %%mm7, %%mm3\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
177 "pfadd %%mm3, %%mm5\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
178 PSWAP_MM("%%mm5","%%mm3")/* mm5 = v*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
179 "pfadd %%mm2, %%mm4\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
180 :\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
181 :"m"(WT), "m"(D), "m"(WB), "m"(D3)\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
182 :"memory");\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
183 __asm__ __volatile__(\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
184 "movq %4, %%mm0\n\t"/* a1 = A1*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
185 "movq %5, %%mm2\n\t"/* a1 = A5*/\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
186 "movq %%mm0, %%mm1\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
187 "movq %%mm2, %%mm3\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
188 "pfadd %%mm4, %%mm0\n\t"/*A1 = a1 + u*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
189 "pfsub %%mm5, %%mm2\n\t"/*A5 = a1 - v*/\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
190 "pfsub %%mm4, %%mm1\n\t"/*A9 = a1 - u*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
191 "pfadd %%mm5, %%mm3\n\t"/*A9 = a1 + v*/\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
192 "movq %%mm0, %0\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
193 "movq %%mm1, %1\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
194 "movq %%mm2, %2\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
195 "movq %%mm3, %3"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
196 :"=m"(A1), "=m"(A9), "=m"(A5), "=m"(A13)\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
197 :"0"(A1), "2"(A5)\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
198 :"memory");\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
199 }
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
200
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
201 #endif