annotate x86/fft_mmx.asm @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 67e7e49058c2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1 ;******************************************************************************
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2 ;* FFT transform with SSE/3DNow optimizations
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
3 ;* Copyright (c) 2008 Loren Merritt
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
4 ;*
12188
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 11931
diff changeset
5 ;* This algorithm (though not any of the implementation details) is
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 11931
diff changeset
6 ;* based on libdjbfft by D. J. Bernstein.
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 11931
diff changeset
7 ;*
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
8 ;* This file is part of FFmpeg.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
9 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
10 ;* FFmpeg is free software; you can redistribute it and/or
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
11 ;* modify it under the terms of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
12 ;* License as published by the Free Software Foundation; either
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
13 ;* version 2.1 of the License, or (at your option) any later version.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
14 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
15 ;* FFmpeg is distributed in the hope that it will be useful,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
18 ;* Lesser General Public License for more details.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
19 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
20 ;* You should have received a copy of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
21 ;* License along with FFmpeg; if not, write to the Free Software
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
22 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
23 ;******************************************************************************
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
24
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
25 ; These functions are not individually interchangeable with the C versions.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
26 ; While C takes arrays of FFTComplex, SSE/3DNow leave intermediate results
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
27 ; in blocks as conventient to the vector size.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
28 ; i.e. {4x real, 4x imaginary, 4x real, ...} (or 2x respectively)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
29
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
30 %include "x86inc.asm"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
31
12399
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
32 %ifdef ARCH_X86_64
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
33 %define pointer resq
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
34 %else
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
35 %define pointer resd
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
36 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
37
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
38 struc FFTContext
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
39 .nbits: resd 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
40 .reverse: resd 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
41 .revtab: pointer 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
42 .tmpbuf: pointer 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
43 .mdctsize: resd 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
44 .mdctbits: resd 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
45 .tcos: pointer 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
46 .tsin: pointer 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
47 endstruc
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
48
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
49 SECTION_RODATA
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
50
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
51 %define M_SQRT1_2 0.70710678118654752440
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
52 ps_root2: times 4 dd M_SQRT1_2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
53 ps_root2mppm: dd -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
54 ps_m1p1: dd 1<<31, 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
55
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
56 %assign i 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
57 %rep 13
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
58 cextern cos_ %+ i
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
59 %assign i i<<1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
60 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
61
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
62 %ifdef ARCH_X86_64
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
63 %define pointer dq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
64 %else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
65 %define pointer dd
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
66 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
67
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
68 %macro IF0 1+
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
69 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
70 %macro IF1 1+
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
71 %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
72 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
73
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
74 section .text align=16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
75
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
76 %macro T2_3DN 4 ; z0, z1, mem0, mem1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
77 mova %1, %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
78 mova %2, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
79 pfadd %1, %4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
80 pfsub %2, %4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
81 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
82
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
83 %macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
84 mova %5, %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
85 pfsub %3, %4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
86 pfadd %5, %4 ; {t6,t5}
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
87 pxor %3, [ps_m1p1] ; {t8,t7}
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
88 mova %6, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
89 pswapd %3, %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
90 pfadd %1, %5 ; {r0,i0}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
91 pfsub %6, %5 ; {r2,i2}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
92 mova %4, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
93 pfadd %2, %3 ; {r1,i1}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
94 pfsub %4, %3 ; {r3,i3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
95 SWAP %3, %6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
96 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
97
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
98 ; in: %1={r0,i0,r1,i1} %2={r2,i2,r3,i3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
99 ; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
100 %macro T4_SSE 3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
101 mova %3, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
102 shufps %1, %2, 0x64 ; {r0,i0,r3,i2}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
103 shufps %3, %2, 0xce ; {r1,i1,r2,i3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
104 mova %2, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
105 addps %1, %3 ; {t1,t2,t6,t5}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
106 subps %2, %3 ; {t3,t4,t8,t7}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
107 mova %3, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
108 shufps %1, %2, 0x44 ; {t1,t2,t3,t4}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
109 shufps %3, %2, 0xbe ; {t6,t5,t7,t8}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
110 mova %2, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
111 addps %1, %3 ; {r0,i0,r1,i1}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
112 subps %2, %3 ; {r2,i2,r3,i3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
113 mova %3, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
114 shufps %1, %2, 0x88 ; {r0,r1,r2,r3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
115 shufps %3, %2, 0xdd ; {i0,i1,i2,i3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
116 SWAP %2, %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
117 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
118
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
119 %macro T8_SSE 6 ; r0,i0,r1,i1,t0,t1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
120 mova %5, %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
121 shufps %3, %4, 0x44 ; {r4,i4,r6,i6}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
122 shufps %5, %4, 0xee ; {r5,i5,r7,i7}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
123 mova %6, %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
124 subps %3, %5 ; {r5,i5,r7,i7}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
125 addps %6, %5 ; {t1,t2,t3,t4}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
126 mova %5, %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
127 shufps %5, %5, 0xb1 ; {i5,r5,i7,r7}
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
128 mulps %3, [ps_root2mppm] ; {-r5,i5,r7,-i7}
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
129 mulps %5, [ps_root2]
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
130 addps %3, %5 ; {t8,t7,ta,t9}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
131 mova %5, %6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
132 shufps %6, %3, 0x36 ; {t3,t2,t9,t8}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
133 shufps %5, %3, 0x9c ; {t1,t4,t7,ta}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
134 mova %3, %6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
135 addps %6, %5 ; {t1,t2,t9,ta}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
136 subps %3, %5 ; {t6,t5,tc,tb}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
137 mova %5, %6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
138 shufps %6, %3, 0xd8 ; {t1,t9,t5,tb}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
139 shufps %5, %3, 0x8d ; {t2,ta,t6,tc}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
140 mova %3, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
141 mova %4, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
142 addps %1, %6 ; {r0,r1,r2,r3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
143 addps %2, %5 ; {i0,i1,i2,i3}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
144 subps %3, %6 ; {r4,r5,r6,r7}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
145 subps %4, %5 ; {i4,i5,i6,i7}
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
146 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
147
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
148 ; scheduled for cpu-bound sizes
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
149 %macro PASS_SMALL 3 ; (to load m4-m7), wre, wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
150 IF%1 mova m4, Z(4)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
151 IF%1 mova m5, Z(5)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
152 mova m0, %2 ; wre
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
153 mova m2, m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
154 mova m1, %3 ; wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
155 mova m3, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
156 mulps m2, m0 ; r2*wre
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
157 IF%1 mova m6, Z2(6)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
158 mulps m3, m1 ; i2*wim
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
159 IF%1 mova m7, Z2(7)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
160 mulps m4, m1 ; r2*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
161 mulps m5, m0 ; i2*wre
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
162 addps m2, m3 ; r2*wre + i2*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
163 mova m3, m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
164 mulps m1, m6 ; r3*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
165 subps m5, m4 ; i2*wre - r2*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
166 mova m4, m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
167 mulps m3, m7 ; i3*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
168 mulps m4, m6 ; r3*wre
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
169 mulps m0, m7 ; i3*wre
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
170 subps m4, m3 ; r3*wre - i3*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
171 mova m3, Z(0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
172 addps m0, m1 ; i3*wre + r3*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
173 mova m1, m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
174 addps m4, m2 ; t5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
175 subps m1, m2 ; t3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
176 subps m3, m4 ; r2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
177 addps m4, Z(0) ; r0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
178 mova m6, Z(2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
179 mova Z(4), m3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
180 mova Z(0), m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
181 mova m3, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
182 subps m5, m0 ; t4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
183 mova m4, m6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
184 subps m6, m5 ; r3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
185 addps m5, m4 ; r1
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
186 mova Z2(6), m6
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
187 mova Z(2), m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
188 mova m2, Z(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
189 addps m3, m0 ; t6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
190 subps m2, m1 ; i3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
191 mova m7, Z(1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
192 addps m1, Z(3) ; i1
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
193 mova Z2(7), m2
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
194 mova Z(3), m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
195 mova m4, m7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
196 subps m7, m3 ; i2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
197 addps m3, m4 ; i0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
198 mova Z(5), m7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
199 mova Z(1), m3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
200 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
201
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
202 ; scheduled to avoid store->load aliasing
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
203 %macro PASS_BIG 1 ; (!interleave)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
204 mova m4, Z(4) ; r2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
205 mova m5, Z(5) ; i2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
206 mova m2, m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
207 mova m0, [wq] ; wre
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
208 mova m3, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
209 mova m1, [wq+o1q] ; wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
210 mulps m2, m0 ; r2*wre
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
211 mova m6, Z2(6) ; r3
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
212 mulps m3, m1 ; i2*wim
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
213 mova m7, Z2(7) ; i3
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
214 mulps m4, m1 ; r2*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
215 mulps m5, m0 ; i2*wre
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
216 addps m2, m3 ; r2*wre + i2*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
217 mova m3, m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
218 mulps m1, m6 ; r3*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
219 subps m5, m4 ; i2*wre - r2*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
220 mova m4, m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
221 mulps m3, m7 ; i3*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
222 mulps m4, m6 ; r3*wre
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
223 mulps m0, m7 ; i3*wre
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
224 subps m4, m3 ; r3*wre - i3*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
225 mova m3, Z(0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
226 addps m0, m1 ; i3*wre + r3*wim
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
227 mova m1, m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
228 addps m4, m2 ; t5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
229 subps m1, m2 ; t3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
230 subps m3, m4 ; r2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
231 addps m4, Z(0) ; r0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
232 mova m6, Z(2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
233 mova Z(4), m3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
234 mova Z(0), m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
235 mova m3, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
236 subps m5, m0 ; t4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
237 mova m4, m6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
238 subps m6, m5 ; r3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
239 addps m5, m4 ; r1
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
240 IF%1 mova Z2(6), m6
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
241 IF%1 mova Z(2), m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
242 mova m2, Z(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
243 addps m3, m0 ; t6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
244 subps m2, m1 ; i3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
245 mova m7, Z(1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
246 addps m1, Z(3) ; i1
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
247 IF%1 mova Z2(7), m2
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
248 IF%1 mova Z(3), m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
249 mova m4, m7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
250 subps m7, m3 ; i2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
251 addps m3, m4 ; i0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
252 IF%1 mova Z(5), m7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
253 IF%1 mova Z(1), m3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
254 %if %1==0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
255 mova m4, m5 ; r1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
256 mova m0, m6 ; r3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
257 unpcklps m5, m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
258 unpckhps m4, m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
259 unpcklps m6, m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
260 unpckhps m0, m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
261 mova m1, Z(0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
262 mova m2, Z(4)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
263 mova Z(2), m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
264 mova Z(3), m4
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
265 mova Z2(6), m6
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
266 mova Z2(7), m0
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
267 mova m5, m1 ; r0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
268 mova m4, m2 ; r2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
269 unpcklps m1, m3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
270 unpckhps m5, m3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
271 unpcklps m2, m7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
272 unpckhps m4, m7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
273 mova Z(0), m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
274 mova Z(1), m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
275 mova Z(4), m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
276 mova Z(5), m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
277 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
278 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
279
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
280 %macro PUNPCK 3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
281 mova %3, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
282 punpckldq %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
283 punpckhdq %3, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
284 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
285
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
286 INIT_XMM
10452
c6aa538c0bc3 s/movdqa/movaps/ in sse1 fft. (regression in r20293)
lorenm
parents: 10019
diff changeset
287 %define mova movaps
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
288
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
289 %define Z(x) [r0+mmsize*x]
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
290 %define Z2(x) [r0+mmsize*x]
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
291
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
292 align 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
293 fft4_sse:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
294 mova m0, Z(0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
295 mova m1, Z(1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
296 T4_SSE m0, m1, m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
297 mova Z(0), m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
298 mova Z(1), m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
299 ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
300
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
301 align 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
302 fft8_sse:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
303 mova m0, Z(0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
304 mova m1, Z(1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
305 T4_SSE m0, m1, m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
306 mova m2, Z(2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
307 mova m3, Z(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
308 T8_SSE m0, m1, m2, m3, m4, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
309 mova Z(0), m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
310 mova Z(1), m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
311 mova Z(2), m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
312 mova Z(3), m3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
313 ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
314
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
315 align 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
316 fft16_sse:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
317 mova m0, Z(0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
318 mova m1, Z(1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
319 T4_SSE m0, m1, m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
320 mova m2, Z(2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
321 mova m3, Z(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
322 T8_SSE m0, m1, m2, m3, m4, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
323 mova m4, Z(4)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
324 mova m5, Z(5)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
325 mova Z(0), m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
326 mova Z(1), m1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
327 mova Z(2), m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
328 mova Z(3), m3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
329 T4_SSE m4, m5, m6
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
330 mova m6, Z2(6)
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
331 mova m7, Z2(7)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
332 T4_SSE m6, m7, m0
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
333 PASS_SMALL 0, [cos_16], [cos_16+16]
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
334 ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
335
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
336
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
337 INIT_MMX
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
338
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
339 %macro FFT48_3DN 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
340 align 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
341 fft4%1:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
342 T2_3DN m0, m1, Z(0), Z(1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
343 mova m2, Z(2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
344 mova m3, Z(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
345 T4_3DN m0, m1, m2, m3, m4, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
346 PUNPCK m0, m1, m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
347 PUNPCK m2, m3, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
348 mova Z(0), m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
349 mova Z(1), m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
350 mova Z(2), m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
351 mova Z(3), m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
352 ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
353
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
354 align 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
355 fft8%1:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
356 T2_3DN m0, m1, Z(0), Z(1)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
357 mova m2, Z(2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
358 mova m3, Z(3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
359 T4_3DN m0, m1, m2, m3, m4, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
360 mova Z(0), m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
361 mova Z(2), m2
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
362 T2_3DN m4, m5, Z(4), Z(5)
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
363 T2_3DN m6, m7, Z2(6), Z2(7)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
364 pswapd m0, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
365 pswapd m2, m7
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
366 pxor m0, [ps_m1p1]
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
367 pxor m2, [ps_m1p1]
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
368 pfsub m5, m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
369 pfadd m7, m2
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
370 pfmul m5, [ps_root2]
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
371 pfmul m7, [ps_root2]
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
372 T4_3DN m1, m3, m5, m7, m0, m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
373 mova Z(5), m5
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
374 mova Z2(7), m7
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
375 mova m0, Z(0)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
376 mova m2, Z(2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
377 T4_3DN m0, m2, m4, m6, m5, m7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
378 PUNPCK m0, m1, m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
379 PUNPCK m2, m3, m7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
380 mova Z(0), m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
381 mova Z(1), m5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
382 mova Z(2), m2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
383 mova Z(3), m7
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
384 PUNPCK m4, Z(5), m5
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
385 PUNPCK m6, Z2(7), m7
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
386 mova Z(4), m4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
387 mova Z(5), m5
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
388 mova Z2(6), m6
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
389 mova Z2(7), m7
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
390 ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
391 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
392
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
393 FFT48_3DN _3dn2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
394
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
395 %macro pswapd 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
396 %ifidn %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
397 movd [r0+12], %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
398 punpckhdq %1, [r0+8]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
399 %else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
400 movq %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
401 psrlq %1, 32
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
402 punpckldq %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
403 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
404 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
405
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
406 FFT48_3DN _3dn
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
407
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
408
12518
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
409 %define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)]
67e7e49058c2 Split and then simplify address generation macro.
reimar
parents: 12432
diff changeset
410 %define Z2(x) [zq + o3q + mmsize*(x&1)]
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
411
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
412 %macro DECL_PASS 2+ ; name, payload
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
413 align 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
414 %1:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
415 DEFINE_ARGS z, w, n, o1, o3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
416 lea o3q, [nq*3]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
417 lea o1q, [nq*8]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
418 shl o3q, 4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
419 .loop:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
420 %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
421 add zq, mmsize*2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
422 add wq, mmsize
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
423 sub nd, mmsize/8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
424 jg .loop
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
425 rep ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
426 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
427
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
428 INIT_XMM
10452
c6aa538c0bc3 s/movdqa/movaps/ in sse1 fft. (regression in r20293)
lorenm
parents: 10019
diff changeset
429 %define mova movaps
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
430 DECL_PASS pass_sse, PASS_BIG 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
431 DECL_PASS pass_interleave_sse, PASS_BIG 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
432
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
433 INIT_MMX
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
434 %define mulps pfmul
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
435 %define addps pfadd
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
436 %define subps pfsub
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
437 %define unpcklps punpckldq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
438 %define unpckhps punpckhdq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
439 DECL_PASS pass_3dn, PASS_SMALL 1, [wq], [wq+o1q]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
440 DECL_PASS pass_interleave_3dn, PASS_BIG 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
441 %define pass_3dn2 pass_3dn
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
442 %define pass_interleave_3dn2 pass_interleave_3dn
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
443
11060
daff45175333 Make the jump-table section-relative for x86_64 with PIC enabled.
reimar
parents: 10452
diff changeset
444 %ifdef PIC
daff45175333 Make the jump-table section-relative for x86_64 with PIC enabled.
reimar
parents: 10452
diff changeset
445 %define SECTION_REL - $$
daff45175333 Make the jump-table section-relative for x86_64 with PIC enabled.
reimar
parents: 10452
diff changeset
446 %else
daff45175333 Make the jump-table section-relative for x86_64 with PIC enabled.
reimar
parents: 10452
diff changeset
447 %define SECTION_REL
daff45175333 Make the jump-table section-relative for x86_64 with PIC enabled.
reimar
parents: 10452
diff changeset
448 %endif
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
449
12399
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
450 %macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
451 lea r2, [dispatch_tab%1]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
452 mov r2, [r2 + (%2q-2)*gprsize]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
453 %ifdef PIC
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
454 lea r3, [$$]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
455 add r2, r3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
456 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
457 call r2
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
458 %endmacro ; FFT_DISPATCH
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
459
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
460 %macro DECL_FFT 2-3 ; nbits, cpu, suffix
11060
daff45175333 Make the jump-table section-relative for x86_64 with PIC enabled.
reimar
parents: 10452
diff changeset
461 %xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
462 %if %1==5
11060
daff45175333 Make the jump-table section-relative for x86_64 with PIC enabled.
reimar
parents: 10452
diff changeset
463 %xdefine list_of_fft list_of_fft, fft16%2 SECTION_REL
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
464 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
465
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
466 %assign n 1<<%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
467 %rep 17-%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
468 %assign n2 n/2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
469 %assign n4 n/4
11060
daff45175333 Make the jump-table section-relative for x86_64 with PIC enabled.
reimar
parents: 10452
diff changeset
470 %xdefine list_of_fft list_of_fft, fft %+ n %+ %3%2 SECTION_REL
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
471
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
472 align 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
473 fft %+ n %+ %3%2:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
474 call fft %+ n2 %+ %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
475 add r0, n*4 - (n&(-2<<%1))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
476 call fft %+ n4 %+ %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
477 add r0, n*2 - (n2&(-2<<%1))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
478 call fft %+ n4 %+ %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
479 sub r0, n*6 + (n2&(-2<<%1))
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 11060
diff changeset
480 lea r1, [cos_ %+ n]
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
481 mov r2d, n4/2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
482 jmp pass%3%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
483
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
484 %assign n n*2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
485 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
486 %undef n
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
487
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
488 align 8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
489 dispatch_tab%3%2: pointer list_of_fft
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
490
8820
7400956a815d Put dispatch_tab in the rodata section for macho64.
astrange
parents: 8430
diff changeset
491 section .text
7400956a815d Put dispatch_tab in the rodata section for macho64.
astrange
parents: 8430
diff changeset
492
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
493 ; On x86_32, this function does the register saving and restoring for all of fft.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
494 ; The others pass args in registers and don't spill anything.
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8820
diff changeset
495 cglobal fft_dispatch%3%2, 2,5,8, z, nbits
12399
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
496 FFT_DISPATCH %3%2, nbits
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
497 RET
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
498 %endmacro ; DECL_FFT
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
499
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
500 DECL_FFT 5, _sse
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
501 DECL_FFT 5, _sse, _interleave
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
502 DECL_FFT 4, _3dn
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
503 DECL_FFT 4, _3dn, _interleave
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
504 DECL_FFT 4, _3dn2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
505 DECL_FFT 4, _3dn2, _interleave
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
506
12399
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
507 INIT_XMM
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
508 %undef mulps
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
509 %undef addps
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
510 %undef subps
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
511 %undef unpcklps
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
512 %undef unpckhps
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
513
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
514 %macro PREROTATER 5 ;-2*k, 2*k, input+n4, tcos+n8, tsin+n8
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
515 movaps xmm0, [%3+%2*4]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
516 movaps xmm1, [%3+%1*4-0x10]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
517 movaps xmm2, xmm0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
518 shufps xmm0, xmm1, 0x88
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
519 shufps xmm1, xmm2, 0x77
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
520 movlps xmm4, [%4+%2*2]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
521 movlps xmm5, [%5+%2*2+0x0]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
522 movhps xmm4, [%4+%1*2-0x8]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
523 movhps xmm5, [%5+%1*2-0x8]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
524 movaps xmm2, xmm0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
525 movaps xmm3, xmm1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
526 mulps xmm0, xmm5
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
527 mulps xmm1, xmm4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
528 mulps xmm2, xmm4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
529 mulps xmm3, xmm5
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
530 subps xmm1, xmm0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
531 addps xmm2, xmm3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
532 movaps xmm0, xmm1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
533 unpcklps xmm1, xmm2
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
534 unpckhps xmm0, xmm2
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
535 %endmacro
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
536
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
537 %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
538 movaps xmm6, [%4+%1*2]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
539 movaps %2, [%4+%1*2+0x10]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
540 movaps %3, xmm6
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
541 movaps xmm7, %2
12432
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
542 mulps xmm6, [%5+%1]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
543 mulps %2, [%6+%1]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
544 mulps %3, [%6+%1]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
545 mulps xmm7, [%5+%1]
12399
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
546 subps %2, xmm6
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
547 addps %3, xmm7
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
548 %endmacro
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
549
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
550 %macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
551 .post:
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
552 CMUL %1, xmm0, xmm1, %3, %4, %5
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
553 CMUL %2, xmm4, xmm5, %3, %4, %5
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
554 shufps xmm1, xmm1, 0x1b
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
555 shufps xmm5, xmm5, 0x1b
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
556 movaps xmm6, xmm4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
557 unpckhps xmm4, xmm1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
558 unpcklps xmm6, xmm1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
559 movaps xmm2, xmm0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
560 unpcklps xmm0, xmm5
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
561 unpckhps xmm2, xmm5
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
562 movaps [%3+%2*2], xmm6
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
563 movaps [%3+%2*2+0x10], xmm4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
564 movaps [%3+%1*2], xmm0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
565 movaps [%3+%1*2+0x10], xmm2
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
566 sub %2, 0x10
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
567 add %1, 0x10
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
568 jl .post
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
569 %endmacro
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
570
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
571 cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
572 %ifdef ARCH_X86_64
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
573 %define rrevtab r10
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
574 %define rtcos r11
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
575 %define rtsin r12
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
576 push r12
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
577 push r13
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
578 push r14
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
579 %else
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
580 %define rrevtab r6
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
581 %define rtsin r6
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
582 %define rtcos r5
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
583 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
584 mov r3d, [r0+FFTContext.mdctsize]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
585 add r2, r3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
586 shr r3, 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
587 mov rtcos, [r0+FFTContext.tcos]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
588 mov rtsin, [r0+FFTContext.tsin]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
589 add rtcos, r3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
590 add rtsin, r3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
591 %ifndef ARCH_X86_64
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
592 push rtcos
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
593 push rtsin
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
594 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
595 shr r3, 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
596 mov rrevtab, [r0+FFTContext.revtab]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
597 add rrevtab, r3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
598 %ifndef ARCH_X86_64
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
599 push rrevtab
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
600 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
601
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
602 sub r3, 4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
603 %ifdef ARCH_X86_64
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
604 xor r4, r4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
605 sub r4, r3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
606 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
607 .pre:
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
608 %ifndef ARCH_X86_64
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
609 ;unspill
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
610 xor r4, r4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
611 sub r4, r3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
612 mov rtsin, [esp+4]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
613 mov rtcos, [esp+8]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
614 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
615
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
616 PREROTATER r4, r3, r2, rtcos, rtsin
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
617 %ifdef ARCH_X86_64
12432
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
618 movzx r5, word [rrevtab+r4-4]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
619 movzx r6, word [rrevtab+r4-2]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
620 movzx r13, word [rrevtab+r3]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
621 movzx r14, word [rrevtab+r3+2]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
622 movlps [r1+r5 *8], xmm0
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
623 movhps [r1+r6 *8], xmm0
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
624 movlps [r1+r13*8], xmm1
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
625 movhps [r1+r14*8], xmm1
12399
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
626 add r4, 4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
627 %else
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
628 mov r6, [esp]
12432
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
629 movzx r5, word [r6+r4-4]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
630 movzx r4, word [r6+r4-2]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
631 movlps [r1+r5*8], xmm0
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
632 movhps [r1+r4*8], xmm0
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
633 movzx r5, word [r6+r3]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
634 movzx r4, word [r6+r3+2]
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
635 movlps [r1+r5*8], xmm1
f61e22f8cf28 cosmetics in imdct_sse
lorenm
parents: 12399
diff changeset
636 movhps [r1+r4*8], xmm1
12399
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
637 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
638 sub r3, 4
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
639 jns .pre
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
640
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
641 mov r5, r0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
642 mov r6, r1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
643 mov r0, r1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
644 mov r1d, [r5+FFTContext.nbits]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
645
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
646 FFT_DISPATCH _sse, r1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
647
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
648 mov r0d, [r5+FFTContext.mdctsize]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
649 add r6, r0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
650 shr r0, 1
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
651 %ifndef ARCH_X86_64
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
652 %define rtcos r2
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
653 %define rtsin r3
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
654 mov rtcos, [esp+8]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
655 mov rtsin, [esp+4]
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
656 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
657 neg r0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
658 mov r1, -16
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
659 sub r1, r0
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
660 POSROTATESHUF r0, r1, r6, rtcos, rtsin
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
661 %ifdef ARCH_X86_64
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
662 pop r14
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
663 pop r13
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
664 pop r12
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
665 %else
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
666 add esp, 12
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
667 %endif
020540442072 Convert ff_imdct_half_sse() to yasm.
alexc
parents: 12188
diff changeset
668 RET