annotate liba52/srfftp_3dnow.h @ 23572:a00685941686

demux_mkv very long seek fix The seek code searching for the closest position in the index used "int64_t min_diff=0xFFFFFFFL" as the initial "further from the goal than any real alternative" value. The unit is milliseconds so seeks more than about 75 hours past the end of the file would fail to recognize the last index position as the best match. This was triggered in practice by chapter seek code which apparently uses a seek of 1000000000 seconds forward to mean "seek to the end". The practical effect was that trying to seek to the next chapter in a file without chapters made MPlayer block until it finished reading the file from the current position to the end. Fixed by increasing the initial value from FFFFFFF to FFFFFFFFFFFFFFF.
author uau
date Wed, 20 Jun 2007 18:19:03 +0000
parents 3c53cbf53e7e
children b5a46071062a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
1 /*
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
2 * srfftp.h
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
3 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
4 * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
5 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
6 * 64 and 128 point split radix fft for ac3dec
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
7 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
8 * The algorithm is desribed in the book:
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
9 * "Computational Frameworks of the Fast Fourier Transform".
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
10 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
11 * The ideas and the the organization of code borrowed from djbfft written by
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
12 * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
13 * http://cr.yp.to/djbfft.html.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
14 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
15 * srfftp.h is free software; you can redistribute it and/or modify
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
16 * it under the terms of the GNU General Public License as published by
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
17 * the Free Software Foundation; either version 2, or (at your option)
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
18 * any later version.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
19 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
20 * srfftp.h is distributed in the hope that it will be useful,
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
23 * GNU General Public License for more details.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
24 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
25 * You should have received a copy of the GNU General Public License
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
26 * along with GNU Make; see the file COPYING. If not, write to
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
27 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
28 *
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
29 * Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
30 * by Nick Kurshev <nickols_k@mail.ru>
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
31 */
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
32
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
33 #ifndef SRFFTP_3DNOW_H__
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
34 #define SRFFTP_3DNOW_H__
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
35
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
36 typedef struct
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
37 {
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
38 unsigned long val[2];
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
39 }i_cmplx_t;
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
40
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
41 #define TRANS_FILL_MM6_MM7_3DNOW()\
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
42 __asm__ __volatile__(\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
43 "movq %1, %%mm7\n\t"\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
44 "movq %0, %%mm6\n\t"\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
45 ::"m"(x_plus_minus_3dnow),\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
46 "m"(x_minus_plus_3dnow)\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
47 :"memory");
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
48
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
49 #ifdef HAVE_3DNOWEX
1116
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
50 #define PSWAP_MM(mm_base,mm_hlp) "pswapd "mm_base","mm_base"\n\t"
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
51 #else
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
52 #define PSWAP_MM(mm_base,mm_hlp)\
1116
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
53 "movq "mm_base","mm_hlp"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
54 "psrlq $32, "mm_base"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
55 "punpckldq "mm_hlp","mm_base"\n\t"
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
56 #endif
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
57 #ifdef HAVE_3DNOWEX
1116
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
58 #define PFNACC_MM(mm_base,mm_hlp) "pfnacc "mm_base","mm_base"\n\t"
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
59 #else
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
60 #define PFNACC_MM(mm_base,mm_hlp)\
1116
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
61 "movq "mm_base","mm_hlp"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
62 "psrlq $32,"mm_hlp"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
63 "punpckldq "mm_hlp","mm_hlp"\n\t"\
faa68f4c03be unstable gcc-3.0 warning surpressing
nickols_k
parents: 1068
diff changeset
64 "pfsub "mm_hlp","mm_base"\n\t"
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
65 #endif
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
66
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
67 #define TRANSZERO_3DNOW(A0,A4,A8,A12) \
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
68 { \
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
69 __asm__ __volatile__(\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
70 "movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
71 "movq %5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
72 "movq %%mm0, %%mm5\n\t"/*u.re = wTB[0].re + wTB[k*2].re;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
73 "pfadd %%mm1, %%mm5\n\t"/*u.im = wTB[0].im + wTB[k*2].im; mm5 = u*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
74 "pxor %%mm6, %%mm0\n\t"/*mm0 = wTB[0].re | -wTB[0].im */\
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
75 "pxor %%mm7, %%mm1\n\t"/*mm1 = -wTB[k*2].re | wTB[k*2].im */\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
76 "pfadd %%mm1, %%mm0\n\t"/*v.im = wTB[0].re - wTB[k*2].re;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
77 "movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
78 PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
79 "movq %6, %%mm0\n\t" /* a1 = A0;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
80 "movq %7, %%mm2\n\t" /* a1 = A4;*/\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
81 "movq %%mm0, %%mm1\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
82 "movq %%mm2, %%mm3\n\t"\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
83 "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
84 "pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\
1053
970fbd433564 Last minute improvements before release
nickols_k
parents: 926
diff changeset
85 "movq %%mm0, %0\n\t"\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
86 "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
1053
970fbd433564 Last minute improvements before release
nickols_k
parents: 926
diff changeset
87 "movq %%mm2, %3\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
88 "pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
89 "movq %%mm1, %1\n\t"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
90 "movq %%mm3, %2"\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
91 :"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
1068
cda8756ac10f Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents: 1053
diff changeset
92 :"m"(wTB[0]), "m"(wTB[k*2]), "m"(A0), "m"(A4)\
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
93 :"memory");\
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
94 }
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
95
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
96 #define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
97 {\
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
98 __asm__ __volatile__(\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
99 "movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
100 "movq %%mm0, %%mm1\n\t"\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
101 "pxor %%mm7, %%mm1\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
102 "pfacc %%mm1, %%mm0\n\t"/*u.im = wTB[2].im - wTB[2].re; mm0 = u*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
103 "movq %5, %%mm1\n\t" /*a.re = wTB[6].im - wTB[6].re; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
104 "movq %%mm1, %%mm2\n\t"\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
105 "pxor %%mm7, %%mm1\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
106 "pfacc %%mm2, %%mm1\n\t"/*a.im = wTB[6].im + wTB[6].re; mm1 = a*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
107 "movq %%mm1, %%mm2\n\t"\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
108 "pxor %%mm7, %%mm2\n\t"/*v.im = u.re - a.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
109 "movq %%mm0, %%mm3\n\t"/*v.re = u.im + a.im;*/\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
110 "pfadd %%mm2, %%mm3\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
111 PSWAP_MM("%%mm3","%%mm2")/*mm3 = v*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
112 "pxor %%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
113 "pfadd %%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\
890
5374e3ecb8a5 Improvements
nickols_k
parents: 886
diff changeset
114 "movq %8, %%mm2\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
115 "pfmul %%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
116 "pfmul %%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
117 "movq %6, %%mm1\n\t" /* a1 = A2;*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
118 "movq %7, %%mm5\n\t" /* a1 = A6;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
119 "movq %%mm1, %%mm2\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
120 "movq %%mm3, %%mm4\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
121 "pfadd %%mm0, %%mm1\n\t" /*A2 = a1 + u;*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
122 "pxor %%mm6, %%mm4\n\t"/*A6.re = a1.re + v.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
123 "pfsub %%mm0, %%mm2\n\t" /*A2 = a1 - u;*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
124 "pxor %%mm7, %%mm3\n\t"/*A14.re = a1.re - v.re;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
125 "movq %%mm1, %0\n\t"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
126 "movq %%mm2, %1\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
127 "movq %%mm5, %%mm2\n\t"\
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
128 "pfadd %%mm4, %%mm5\n\t"/*A6.im = a1.im - v.im;*/\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
129 "pfadd %%mm3, %%mm2\n\t"/*A14.im = a1.im + v.im;*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
130 "movq %%mm5, %2\n\t"\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
131 "movq %%mm2, %3"\
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
132 :"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\
1068
cda8756ac10f Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents: 1053
diff changeset
133 :"m"(wTB[2]), "m"(wTB[6]), "m"(A2), "m"(A6), "m"(HSQRT2_3DNOW)\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
134 :"memory");\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
135 }
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
136
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
137 #define TRANS_3DNOW(A1,A5,A9,A13,WT,WB,D,D3)\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
138 { \
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
139 __asm__ __volatile__(\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
140 "movq %1, %%mm4\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
141 "movq %%mm4, %%mm5\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
142 "punpckldq %%mm4, %%mm4\n\t"/*mm4 = D.re | D.re */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
143 "punpckhdq %%mm5, %%mm5\n\t"/*mm5 = D.im | D.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
144 "movq %0, %%mm0\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
145 "pfmul %%mm0, %%mm4\n\t"/* mm4 =u.re | u.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
146 "pfmul %%mm0, %%mm5\n\t"/* mm5 = a.re | a.im */\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
147 PSWAP_MM("%%mm5","%%mm3")\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
148 "pxor %%mm7, %%mm5\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
149 "pfadd %%mm5, %%mm4\n\t"/* mm4 = u*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
150 "movq %3, %%mm1\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
151 "movq %2, %%mm0\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
152 PSWAP_MM("%%mm1","%%mm3")\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
153 "movq %%mm0, %%mm2\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
154 "pfmul %%mm1, %%mm0\n\t"/* mm0 = a*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
155 "pfmul %3, %%mm2\n\t"/* mm2 = v*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
156 PFNACC_MM("%%mm2","%%mm3")\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
157 "pfacc %%mm0, %%mm0\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
158 "movq %%mm4, %%mm5\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
159 "punpckldq %%mm0,%%mm2\n\t"/*mm2 = v.re | a.re*/\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
160 "pxor %%mm6, %%mm5\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
161 "movq %%mm2, %%mm3\n\t"\
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 1116
diff changeset
162 "pxor %%mm7, %%mm3\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
163 "pfadd %%mm3, %%mm5\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
164 PSWAP_MM("%%mm5","%%mm3")/* mm5 = v*/\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
165 "pfadd %%mm2, %%mm4\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
166 :\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
167 :"m"(WT), "m"(D), "m"(WB), "m"(D3)\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
168 :"memory");\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
169 __asm__ __volatile__(\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
170 "movq %4, %%mm0\n\t"/* a1 = A1*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
171 "movq %5, %%mm2\n\t"/* a1 = A5*/\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
172 "movq %%mm0, %%mm1\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
173 "movq %%mm2, %%mm3\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
174 "pfadd %%mm4, %%mm0\n\t"/*A1 = a1 + u*/\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
175 "pfsub %%mm5, %%mm2\n\t"/*A5 = a1 - v*/\
1053
970fbd433564 Last minute improvements before release
nickols_k
parents: 926
diff changeset
176 "movq %%mm0, %0\n\t"\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
177 "pfsub %%mm4, %%mm1\n\t"/*A9 = a1 - u*/\
1053
970fbd433564 Last minute improvements before release
nickols_k
parents: 926
diff changeset
178 "movq %%mm2, %2\n\t"\
926
d44a690543ac Better insns scheduling and moving out local variables
nickols_k
parents: 920
diff changeset
179 "pfadd %%mm5, %%mm3\n\t"/*A9 = a1 + v*/\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
180 "movq %%mm1, %1\n\t"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
181 "movq %%mm3, %3"\
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
182 :"=m"(A1), "=m"(A9), "=m"(A5), "=m"(A13)\
1068
cda8756ac10f Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents: 1053
diff changeset
183 :"m"(A1), "m"(A5)\
920
deeaad5bf1d7 libac3 now is full 3dnow! optimized
nickols_k
parents: 890
diff changeset
184 :"memory");\
886
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
185 }
cb432deedb92 Improvements
nickols_k
parents: 885
diff changeset
186
885
cec1562ccf8a Improvements
nickols_k
parents:
diff changeset
187 #endif