annotate liba52/srfftp_3dnow.h @ 28992:947ef23ba798

Test if create_vdp_decoder() might succeed by calling it from config() with a small value for max_reference_frames. This does not make automatic recovery by using software decoder possible, but lets MPlayer fail more graciously on - actually existing - buggy hardware that does not support certain H264 widths when using hardware accelerated decoding (784, 864, 944, 1024, 1808, 1888 pixels on NVIDIA G98) and if the user tries to hardware-decode more samples at the same time than supported. Might break playback of H264 Intra-Only samples on hardware with very little video memory.
author cehoyos
date Sat, 21 Mar 2009 20:11:05 +0000
parents 31287e75b5d8
children e83eef58b30a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
1 /*
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
2 * srfftp.h
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
3 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
4 * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
5 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
6 * 64 and 128 point split radix fft for ac3dec
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
7 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
8 * The algorithm is desribed in the book:
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
9 * "Computational Frameworks of the Fast Fourier Transform".
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
10 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
11 * The ideas and the the organization of code borrowed from djbfft written by
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
12 * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
13 * http://cr.yp.to/djbfft.html.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
14 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
15 * srfftp.h is free software; you can redistribute it and/or modify
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
16 * it under the terms of the GNU General Public License as published by
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
17 * the Free Software Foundation; either version 2, or (at your option)
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
18 * any later version.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
19 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
20 * srfftp.h is distributed in the hope that it will be useful,
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
23 * GNU General Public License for more details.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
24 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
25 * You should have received a copy of the GNU General Public License
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
26 * along with GNU Make; see the file COPYING. If not, write to
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
27 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
28 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
29 * Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
30 * by Nick Kurshev <nickols_k@mail.ru>
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
31 */
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
32
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
33 #ifndef SRFFTP_3DNOW_H__
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
34 #define SRFFTP_3DNOW_H__
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
35
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
36 typedef struct
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
37 {
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
38 unsigned long val[2];
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
39 }i_cmplx_t;
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
40
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
41 #define TRANS_FILL_MM6_MM7_3DNOW()\
27757
b5a46071062a Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents: 1173
diff changeset
42 __asm__ volatile(\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
43 "movq %1, %%mm7\n\t"\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
44 "movq %0, %%mm6\n\t"\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
45 ::"m"(x_plus_minus_3dnow),\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
46 "m"(x_minus_plus_3dnow)\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
47 :"memory");
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
48
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
49 #if HAVE_AMD3DNOWEXT
1116
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
50 #define PSWAP_MM(mm_base,mm_hlp) "pswapd "mm_base","mm_base"\n\t"
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
51 #else
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
52 #define PSWAP_MM(mm_base,mm_hlp)\
1116
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
53 "movq "mm_base","mm_hlp"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
54 "psrlq $32, "mm_base"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
55 "punpckldq "mm_hlp","mm_base"\n\t"
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
56 #endif
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
57 #if HAVE_AMD3DNOWEXT
1116
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
58 #define PFNACC_MM(mm_base,mm_hlp) "pfnacc "mm_base","mm_base"\n\t"
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
59 #else
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
60 #define PFNACC_MM(mm_base,mm_hlp)\
1116
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
61 "movq "mm_base","mm_hlp"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
62 "psrlq $32,"mm_hlp"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
63 "punpckldq "mm_hlp","mm_hlp"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
64 "pfsub "mm_hlp","mm_base"\n\t"
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
65 #endif
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
66
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
67 #define TRANSZERO_3DNOW(A0,A4,A8,A12) \
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
68 { \
27757
b5a46071062a Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents: 1173
diff changeset
69 __asm__ volatile(\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
70 "movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
71 "movq %5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
72 "movq %%mm0, %%mm5\n\t"/*u.re = wTB[0].re + wTB[k*2].re;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
73 "pfadd %%mm1, %%mm5\n\t"/*u.im = wTB[0].im + wTB[k*2].im; mm5 = u*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
74 "pxor %%mm6, %%mm0\n\t"/*mm0 = wTB[0].re | -wTB[0].im */\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
75 "pxor %%mm7, %%mm1\n\t"/*mm1 = -wTB[k*2].re | wTB[k*2].im */\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
76 "pfadd %%mm1, %%mm0\n\t"/*v.im = wTB[0].re - wTB[k*2].re;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
77 "movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
78 PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
79 "movq %6, %%mm0\n\t" /* a1 = A0;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
80 "movq %7, %%mm2\n\t" /* a1 = A4;*/\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
81 "movq %%mm0, %%mm1\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
82 "movq %%mm2, %%mm3\n\t"\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
83 "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
84 "pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\
1053
970fbd433564 Last minute improvements before release
nickols_k
parents: 926
diff changeset
85 "movq %%mm0, %0\n\t"\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
86 "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
1053
970fbd433564 Last minute improvements before release
nickols_k
parents: 926
diff changeset
87 "movq %%mm2, %3\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
88 "pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
89 "movq %%mm1, %1\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
90 "movq %%mm3, %2"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
91 :"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
1068
cda8756ac10f Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents: 1053
diff changeset
92 :"m"(wTB[0]), "m"(wTB[k*2]), "m"(A0), "m"(A4)\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
93 :"memory");\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
94 }
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
95
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
96 #define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
97 {\
27757
b5a46071062a Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents: 1173
diff changeset
98 __asm__ volatile(\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
99 "movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
100 "movq %%mm0, %%mm1\n\t"\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
101 "pxor %%mm7, %%mm1\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
102 "pfacc %%mm1, %%mm0\n\t"/*u.im = wTB[2].im - wTB[2].re; mm0 = u*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
103 "movq %5, %%mm1\n\t" /*a.re = wTB[6].im - wTB[6].re; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
104 "movq %%mm1, %%mm2\n\t"\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
105 "pxor %%mm7, %%mm1\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
106 "pfacc %%mm2, %%mm1\n\t"/*a.im = wTB[6].im + wTB[6].re; mm1 = a*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
107 "movq %%mm1, %%mm2\n\t"\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
108 "pxor %%mm7, %%mm2\n\t"/*v.im = u.re - a.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
109 "movq %%mm0, %%mm3\n\t"/*v.re = u.im + a.im;*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
110 "pfadd %%mm2, %%mm3\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
111 PSWAP_MM("%%mm3","%%mm2")/*mm3 = v*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
112 "pxor %%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
113 "pfadd %%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
114 "movq %8, %%mm2\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
115 "pfmul %%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
116 "pfmul %%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
117 "movq %6, %%mm1\n\t" /* a1 = A2;*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
118 "movq %7, %%mm5\n\t" /* a1 = A6;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
119 "movq %%mm1, %%mm2\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
120 "movq %%mm3, %%mm4\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
121 "pfadd %%mm0, %%mm1\n\t" /*A2 = a1 + u;*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
122 "pxor %%mm6, %%mm4\n\t"/*A6.re = a1.re + v.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
123 "pfsub %%mm0, %%mm2\n\t" /*A2 = a1 - u;*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
124 "pxor %%mm7, %%mm3\n\t"/*A14.re = a1.re - v.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
125 "movq %%mm1, %0\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
126 "movq %%mm2, %1\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
127 "movq %%mm5, %%mm2\n\t"\
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
128 "pfadd %%mm4, %%mm5\n\t"/*A6.im = a1.im - v.im;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
129 "pfadd %%mm3, %%mm2\n\t"/*A14.im = a1.im + v.im;*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
130 "movq %%mm5, %2\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
131 "movq %%mm2, %3"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
132 :"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\
1068
cda8756ac10f Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents: 1053
diff changeset
133 :"m"(wTB[2]), "m"(wTB[6]), "m"(A2), "m"(A6), "m"(HSQRT2_3DNOW)\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
134 :"memory");\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
135 }
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
136
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
137 #define TRANS_3DNOW(A1,A5,A9,A13,WT,WB,D,D3)\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
138 { \
27757
b5a46071062a Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents: 1173
diff changeset
139 __asm__ volatile(\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
140 "movq %1, %%mm4\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
141 "movq %%mm4, %%mm5\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
142 "punpckldq %%mm4, %%mm4\n\t"/*mm4 = D.re | D.re */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
143 "punpckhdq %%mm5, %%mm5\n\t"/*mm5 = D.im | D.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
144 "movq %0, %%mm0\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
145 "pfmul %%mm0, %%mm4\n\t"/* mm4 =u.re | u.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
146 "pfmul %%mm0, %%mm5\n\t"/* mm5 = a.re | a.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
147 PSWAP_MM("%%mm5","%%mm3")\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
148 "pxor %%mm7, %%mm5\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
149 "pfadd %%mm5, %%mm4\n\t"/* mm4 = u*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
150 "movq %3, %%mm1\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
151 "movq %2, %%mm0\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
152 PSWAP_MM("%%mm1","%%mm3")\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
153 "movq %%mm0, %%mm2\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
154 "pfmul %%mm1, %%mm0\n\t"/* mm0 = a*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
155 "pfmul %3, %%mm2\n\t"/* mm2 = v*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
156 PFNACC_MM("%%mm2","%%mm3")\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
157 "pfacc %%mm0, %%mm0\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
158 "movq %%mm4, %%mm5\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
159 "punpckldq %%mm0,%%mm2\n\t"/*mm2 = v.re | a.re*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
160 "pxor %%mm6, %%mm5\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
161 "movq %%mm2, %%mm3\n\t"\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
162 "pxor %%mm7, %%mm3\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
163 "pfadd %%mm3, %%mm5\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
164 PSWAP_MM("%%mm5","%%mm3")/* mm5 = v*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
165 "pfadd %%mm2, %%mm4\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
166 :\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
167 :"m"(WT), "m"(D), "m"(WB), "m"(D3)\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
168 :"memory");\
27757
b5a46071062a Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents: 1173
diff changeset
169 __asm__ volatile(\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
170 "movq %4, %%mm0\n\t"/* a1 = A1*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
171 "movq %5, %%mm2\n\t"/* a1 = A5*/\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
172 "movq %%mm0, %%mm1\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
173 "movq %%mm2, %%mm3\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
174 "pfadd %%mm4, %%mm0\n\t"/*A1 = a1 + u*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
175 "pfsub %%mm5, %%mm2\n\t"/*A5 = a1 - v*/\
1053
970fbd433564 Last minute improvements before release
nickols_k
parents: 926
diff changeset
176 "movq %%mm0, %0\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
177 "pfsub %%mm4, %%mm1\n\t"/*A9 = a1 - u*/\
1053
970fbd433564 Last minute improvements before release
nickols_k
parents: 926
diff changeset
178 "movq %%mm2, %2\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
179 "pfadd %%mm5, %%mm3\n\t"/*A9 = a1 + v*/\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
180 "movq %%mm1, %1\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
181 "movq %%mm3, %3"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
182 :"=m"(A1), "=m"(A9), "=m"(A5), "=m"(A13)\
1068
cda8756ac10f Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents: 1053
diff changeset
183 :"m"(A1), "m"(A5)\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
184 :"memory");\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
185 }
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
186
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
187 #endif