Mercurial > libavcodec.hg
annotate x86/x86util.asm @ 11980:263b4ef7ad87 libavcodec
tablegen: implement and use WRITE_ARRAY macros
Two macros (WRITE_ARRAY and WRITE_ARRAY_2D) take the prefix (modifiers)
(not all tables are static, and they might not be constant either), the
type, and the name of the array. It'll be copied with same name and type,
and with the correct size of the currently-defined object.
author | flameeyes |
---|---|
date | Sun, 27 Jun 2010 12:21:12 +0000 |
parents | 980030a3e315 |
children | 88563eada57f |
rev | line source |
---|---|
8510 | 1 ;***************************************************************************** |
8804
ba83a0c57e9f
Fix wrong file name in header, noticed by David DeHaven, dave sagetv com.
diego
parents:
8510
diff
changeset
|
2 ;* x86util.asm |
8510 | 3 ;***************************************************************************** |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
4 ;* Copyright (C) 2008 x264 project |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
5 ;* |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
6 ;* Authors: Holger Lubitz <holger@lubitz.org> |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
7 ;* Loren Merritt <lorenm@u.washington.edu> |
8510 | 8 ;* |
9 ;* This program is free software; you can redistribute it and/or modify | |
10 ;* it under the terms of the GNU General Public License as published by | |
11 ;* the Free Software Foundation; either version 2 of the License, or | |
12 ;* (at your option) any later version. | |
13 ;* | |
14 ;* This program is distributed in the hope that it will be useful, | |
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 ;* GNU General Public License for more details. | |
18 ;* | |
19 ;* You should have received a copy of the GNU General Public License | |
20 ;* along with this program; if not, write to the Free Software | |
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
22 ;***************************************************************************** | |
23 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
24 %assign FENC_STRIDE 16 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
25 %assign FDEC_STRIDE 32 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
26 |
8510 | 27 %macro SBUTTERFLY 4 |
28 mova m%4, m%2 | |
29 punpckl%1 m%2, m%3 | |
30 punpckh%1 m%4, m%3 | |
31 SWAP %3, %4 | |
32 %endmacro | |
33 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
34 %macro SBUTTERFLY2 4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
35 mova m%4, m%2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
36 punpckh%1 m%2, m%3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
37 punpckl%1 m%4, m%3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
38 SWAP %2, %4, %3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
39 %endmacro |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
40 |
8510 | 41 %macro TRANSPOSE4x4W 5 |
42 SBUTTERFLY wd, %1, %2, %5 | |
43 SBUTTERFLY wd, %3, %4, %5 | |
44 SBUTTERFLY dq, %1, %3, %5 | |
45 SBUTTERFLY dq, %2, %4, %5 | |
46 SWAP %2, %3 | |
47 %endmacro | |
48 | |
49 %macro TRANSPOSE2x4x4W 5 | |
50 SBUTTERFLY wd, %1, %2, %5 | |
51 SBUTTERFLY wd, %3, %4, %5 | |
52 SBUTTERFLY dq, %1, %3, %5 | |
53 SBUTTERFLY dq, %2, %4, %5 | |
54 SBUTTERFLY qdq, %1, %2, %5 | |
55 SBUTTERFLY qdq, %3, %4, %5 | |
56 %endmacro | |
57 | |
58 %macro TRANSPOSE4x4D 5 | |
59 SBUTTERFLY dq, %1, %2, %5 | |
60 SBUTTERFLY dq, %3, %4, %5 | |
61 SBUTTERFLY qdq, %1, %3, %5 | |
62 SBUTTERFLY qdq, %2, %4, %5 | |
63 SWAP %2, %3 | |
64 %endmacro | |
65 | |
66 %macro TRANSPOSE8x8W 9-11 | |
67 %ifdef ARCH_X86_64 | |
68 SBUTTERFLY wd, %1, %2, %9 | |
69 SBUTTERFLY wd, %3, %4, %9 | |
70 SBUTTERFLY wd, %5, %6, %9 | |
71 SBUTTERFLY wd, %7, %8, %9 | |
72 SBUTTERFLY dq, %1, %3, %9 | |
73 SBUTTERFLY dq, %2, %4, %9 | |
74 SBUTTERFLY dq, %5, %7, %9 | |
75 SBUTTERFLY dq, %6, %8, %9 | |
76 SBUTTERFLY qdq, %1, %5, %9 | |
77 SBUTTERFLY qdq, %2, %6, %9 | |
78 SBUTTERFLY qdq, %3, %7, %9 | |
79 SBUTTERFLY qdq, %4, %8, %9 | |
80 SWAP %2, %5 | |
81 SWAP %4, %7 | |
82 %else | |
83 ; in: m0..m7, unless %11 in which case m6 is in %9 | |
84 ; out: m0..m7, unless %11 in which case m4 is in %10 | |
85 ; spills into %9 and %10 | |
86 %if %0<11 | |
87 movdqa %9, m%7 | |
88 %endif | |
89 SBUTTERFLY wd, %1, %2, %7 | |
90 movdqa %10, m%2 | |
91 movdqa m%7, %9 | |
92 SBUTTERFLY wd, %3, %4, %2 | |
93 SBUTTERFLY wd, %5, %6, %2 | |
94 SBUTTERFLY wd, %7, %8, %2 | |
95 SBUTTERFLY dq, %1, %3, %2 | |
96 movdqa %9, m%3 | |
97 movdqa m%2, %10 | |
98 SBUTTERFLY dq, %2, %4, %3 | |
99 SBUTTERFLY dq, %5, %7, %3 | |
100 SBUTTERFLY dq, %6, %8, %3 | |
101 SBUTTERFLY qdq, %1, %5, %3 | |
102 SBUTTERFLY qdq, %2, %6, %3 | |
103 movdqa %10, m%2 | |
104 movdqa m%3, %9 | |
105 SBUTTERFLY qdq, %3, %7, %2 | |
106 SBUTTERFLY qdq, %4, %8, %2 | |
107 SWAP %2, %5 | |
108 SWAP %4, %7 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
109 %if %0<11 |
8510 | 110 movdqa m%5, %10 |
111 %endif | |
112 %endif | |
113 %endmacro | |
114 | |
115 %macro ABS1_MMX 2 ; a, tmp | |
116 pxor %2, %2 | |
117 psubw %2, %1 | |
118 pmaxsw %1, %2 | |
119 %endmacro | |
120 | |
121 %macro ABS2_MMX 4 ; a, b, tmp0, tmp1 | |
122 pxor %3, %3 | |
123 pxor %4, %4 | |
124 psubw %3, %1 | |
125 psubw %4, %2 | |
126 pmaxsw %1, %3 | |
127 pmaxsw %2, %4 | |
128 %endmacro | |
129 | |
130 %macro ABS1_SSSE3 2 | |
131 pabsw %1, %1 | |
132 %endmacro | |
133 | |
134 %macro ABS2_SSSE3 4 | |
135 pabsw %1, %1 | |
136 pabsw %2, %2 | |
137 %endmacro | |
138 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
139 %macro ABSB_MMX 2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
140 pxor %2, %2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
141 psubb %2, %1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
142 pminub %1, %2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
143 %endmacro |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
144 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
145 %macro ABSB2_MMX 4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
146 pxor %3, %3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
147 pxor %4, %4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
148 psubb %3, %1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
149 psubb %4, %2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
150 pminub %1, %3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
151 pminub %2, %4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
152 %endmacro |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
153 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
154 %macro ABSB_SSSE3 2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
155 pabsb %1, %1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
156 %endmacro |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
157 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
158 %macro ABSB2_SSSE3 4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
159 pabsb %1, %1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
160 pabsb %2, %2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
161 %endmacro |
8510 | 162 |
163 %macro ABS4 6 | |
164 ABS2 %1, %2, %5, %6 | |
165 ABS2 %3, %4, %5, %6 | |
166 %endmacro | |
167 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
168 %define ABS1 ABS1_MMX |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
169 %define ABS2 ABS2_MMX |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
170 %define ABSB ABSB_MMX |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
171 %define ABSB2 ABSB2_MMX |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
172 |
8510 | 173 %macro SPLATB_MMX 3 |
174 movd %1, [%2-3] ;to avoid crossing a cacheline | |
175 punpcklbw %1, %1 | |
176 %if mmsize==16 | |
177 pshuflw %1, %1, 0xff | |
178 punpcklqdq %1, %1 | |
179 %else | |
180 pshufw %1, %1, 0xff | |
181 %endif | |
182 %endmacro | |
183 | |
184 %macro SPLATB_SSSE3 3 | |
185 movd %1, [%2-3] | |
186 pshufb %1, %3 | |
187 %endmacro | |
188 | |
189 %macro PALIGNR_MMX 4 | |
190 %ifnidn %4, %2 | |
191 mova %4, %2 | |
192 %endif | |
193 %if mmsize == 8 | |
194 psllq %1, (8-%3)*8 | |
195 psrlq %4, %3*8 | |
196 %else | |
197 pslldq %1, 16-%3 | |
198 psrldq %4, %3 | |
199 %endif | |
200 por %1, %4 | |
201 %endmacro | |
202 | |
203 %macro PALIGNR_SSSE3 4 | |
204 palignr %1, %2, %3 | |
205 %endmacro | |
206 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
207 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
208 %ifnum %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
209 mova m%1, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
210 mova m%3, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
211 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
212 mova m%1, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
213 mova m%3, m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
214 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
215 pand m%1, m%2 ; dst .. y6 .. y4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
216 pand m%3, m%4 ; src .. y6 .. y4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
217 psrlw m%2, 8 ; dst .. y7 .. y5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
218 psrlw m%4, 8 ; src .. y7 .. y5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
219 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
220 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
221 %macro SUMSUB_BA 2-3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
222 %if %0==2 |
8510 | 223 paddw %1, %2 |
224 paddw %2, %2 | |
225 psubw %2, %1 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
226 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
227 mova %3, %1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
228 paddw %1, %2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
229 psubw %2, %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
230 %endif |
8510 | 231 %endmacro |
232 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
233 %macro SUMSUB_BADC 4-5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
234 %if %0==5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
235 SUMSUB_BA %1, %2, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
236 SUMSUB_BA %3, %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
237 %else |
8510 | 238 paddw %1, %2 |
239 paddw %3, %4 | |
240 paddw %2, %2 | |
241 paddw %4, %4 | |
242 psubw %2, %1 | |
243 psubw %4, %3 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
244 %endif |
8510 | 245 %endmacro |
246 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
247 %macro HADAMARD4_V 4+ |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
248 SUMSUB_BADC %1, %2, %3, %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
249 SUMSUB_BADC %1, %3, %2, %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
250 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
251 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
252 %macro HADAMARD8_V 8+ |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
253 SUMSUB_BADC %1, %2, %3, %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
254 SUMSUB_BADC %5, %6, %7, %8 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
255 SUMSUB_BADC %1, %3, %2, %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
256 SUMSUB_BADC %5, %7, %6, %8 |
8510 | 257 SUMSUB_BADC %1, %5, %2, %6 |
258 SUMSUB_BADC %3, %7, %4, %8 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
259 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
260 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
261 %macro TRANS_SSE2 5-6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
262 ; TRANSPOSE2x2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
263 ; %1: transpose width (d/q) - use SBUTTERFLY qdq for dq |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
264 ; %2: ord/unord (for compat with sse4, unused) |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
265 ; %3/%4: source regs |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
266 ; %5/%6: tmp regs |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
267 %ifidn %1, d |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
268 %define mask [mask_10] |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
269 %define shift 16 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
270 %elifidn %1, q |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
271 %define mask [mask_1100] |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
272 %define shift 32 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
273 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
274 %if %0==6 ; less dependency if we have two tmp |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
275 mova m%5, mask ; ff00 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
276 mova m%6, m%4 ; x5x4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
277 psll%1 m%4, shift ; x4.. |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
278 pand m%6, m%5 ; x5.. |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
279 pandn m%5, m%3 ; ..x0 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
280 psrl%1 m%3, shift ; ..x1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
281 por m%4, m%5 ; x4x0 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
282 por m%3, m%6 ; x5x1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
283 %else ; more dependency, one insn less. sometimes faster, sometimes not |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
284 mova m%5, m%4 ; x5x4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
285 psll%1 m%4, shift ; x4.. |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
286 pxor m%4, m%3 ; (x4^x1)x0 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
287 pand m%4, mask ; (x4^x1).. |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
288 pxor m%3, m%4 ; x4x0 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
289 psrl%1 m%4, shift ; ..(x1^x4) |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
290 pxor m%5, m%4 ; x5x1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
291 SWAP %4, %3, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
292 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
293 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
294 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
295 %macro TRANS_SSE4 5-6 ; see above |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
296 %ifidn %1, d |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
297 mova m%5, m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
298 %ifidn %2, ord |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
299 psrl%1 m%3, 16 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
300 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
301 pblendw m%3, m%4, 10101010b |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
302 psll%1 m%4, 16 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
303 %ifidn %2, ord |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
304 pblendw m%4, m%5, 01010101b |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
305 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
306 psrl%1 m%5, 16 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
307 por m%4, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
308 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
309 %elifidn %1, q |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
310 mova m%5, m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
311 shufps m%3, m%4, 10001000b |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
312 shufps m%5, m%4, 11011101b |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
313 SWAP %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
314 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
315 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
316 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
317 %macro HADAMARD 5-6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
318 ; %1=distance in words (0 for vertical pass, 1/2/4 for horizontal passes) |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
319 ; %2=sumsub/max/amax (sum and diff / maximum / maximum of absolutes) |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
320 ; %3/%4: regs |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
321 ; %5(%6): tmpregs |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
322 %if %1!=0 ; have to reorder stuff for horizontal op |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
323 %ifidn %2, sumsub |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
324 %define ORDER ord |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
325 ; sumsub needs order because a-b != b-a unless a=b |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
326 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
327 %define ORDER unord |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
328 ; if we just max, order doesn't matter (allows pblendw+or in sse4) |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
329 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
330 %if %1==1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
331 TRANS d, ORDER, %3, %4, %5, %6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
332 %elif %1==2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
333 %if mmsize==8 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
334 SBUTTERFLY dq, %3, %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
335 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
336 TRANS q, ORDER, %3, %4, %5, %6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
337 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
338 %elif %1==4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
339 SBUTTERFLY qdq, %3, %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
340 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
341 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
342 %ifidn %2, sumsub |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
343 SUMSUB_BA m%3, m%4, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
344 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
345 %ifidn %2, amax |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
346 %if %0==6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
347 ABS2 m%3, m%4, m%5, m%6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
348 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
349 ABS1 m%3, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
350 ABS1 m%4, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
351 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
352 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
353 pmaxsw m%3, m%4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
354 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
355 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
356 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
357 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
358 %macro HADAMARD2_2D 6-7 sumsub |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
359 HADAMARD 0, sumsub, %1, %2, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
360 HADAMARD 0, sumsub, %3, %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
361 SBUTTERFLY %6, %1, %2, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
362 %ifnum %7 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
363 HADAMARD 0, amax, %1, %2, %5, %7 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
364 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
365 HADAMARD 0, %7, %1, %2, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
366 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
367 SBUTTERFLY %6, %3, %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
368 %ifnum %7 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
369 HADAMARD 0, amax, %3, %4, %5, %7 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
370 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
371 HADAMARD 0, %7, %3, %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
372 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
373 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
374 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
375 %macro HADAMARD4_2D 5-6 sumsub |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
376 HADAMARD2_2D %1, %2, %3, %4, %5, wd |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
377 HADAMARD2_2D %1, %3, %2, %4, %5, dq, %6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
378 SWAP %2, %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
379 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
380 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
381 %macro HADAMARD4_2D_SSE 5-6 sumsub |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
382 HADAMARD 0, sumsub, %1, %2, %5 ; 1st V row 0 + 1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
383 HADAMARD 0, sumsub, %3, %4, %5 ; 1st V row 2 + 3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
384 SBUTTERFLY wd, %1, %2, %5 ; %1: m0 1+0 %2: m1 1+0 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
385 SBUTTERFLY wd, %3, %4, %5 ; %3: m0 3+2 %4: m1 3+2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
386 HADAMARD2_2D %1, %3, %2, %4, %5, dq |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
387 SBUTTERFLY qdq, %1, %2, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
388 HADAMARD 0, %6, %1, %2, %5 ; 2nd H m1/m0 row 0+1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
389 SBUTTERFLY qdq, %3, %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
390 HADAMARD 0, %6, %3, %4, %5 ; 2nd H m1/m0 row 2+3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
391 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
392 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
393 %macro HADAMARD8_2D 9-10 sumsub |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
394 HADAMARD2_2D %1, %2, %3, %4, %9, wd |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
395 HADAMARD2_2D %5, %6, %7, %8, %9, wd |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
396 HADAMARD2_2D %1, %3, %2, %4, %9, dq |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
397 HADAMARD2_2D %5, %7, %6, %8, %9, dq |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
398 HADAMARD2_2D %1, %5, %3, %7, %9, qdq, %10 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
399 HADAMARD2_2D %2, %6, %4, %8, %9, qdq, %10 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
400 %ifnidn %10, amax |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
401 SWAP %2, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
402 SWAP %4, %7 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
403 %endif |
8510 | 404 %endmacro |
405 | |
406 %macro SUMSUB2_AB 3 | |
407 mova %3, %1 | |
408 paddw %1, %1 | |
409 paddw %1, %2 | |
410 psubw %3, %2 | |
411 psubw %3, %2 | |
412 %endmacro | |
413 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
414 %macro SUMSUB2_BA 3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
415 mova m%3, m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
416 paddw m%1, m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
417 paddw m%1, m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
418 psubw m%2, m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
419 psubw m%2, m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
420 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
421 |
8510 | 422 %macro SUMSUBD2_AB 4 |
423 mova %4, %1 | |
424 mova %3, %2 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
425 psraw %2, 1 ; %2: %2>>1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
426 psraw %1, 1 ; %1: %1>>1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
427 paddw %2, %4 ; %2: %2>>1+%1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
428 psubw %1, %3 ; %1: %1>>1-%2 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
429 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
430 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
431 %macro DCT4_1D 5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
432 %ifnum %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
433 SUMSUB_BADC m%4, m%1, m%3, m%2; m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
434 SUMSUB_BA m%3, m%4, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
435 SUMSUB2_AB m%1, m%2, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
436 SWAP %1, %3, %4, %5, %2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
437 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
438 SUMSUB_BADC m%4, m%1, m%3, m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
439 SUMSUB_BA m%3, m%4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
440 mova [%5], m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
441 SUMSUB2_AB m%1, [%5], m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
442 SWAP %1, %3, %4, %2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
443 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
444 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
445 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
446 %macro IDCT4_1D 5-6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
447 %ifnum %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
448 SUMSUBD2_AB m%2, m%4, m%6, m%5 |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
449 ; %2: %2>>1-%4 %4: %2+%4>>1 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
450 SUMSUB_BA m%3, m%1, m%6 |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
451 ; %3: %1+%3 %1: %1-%3 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
452 SUMSUB_BADC m%4, m%3, m%2, m%1, m%6 |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
453 ; %4: %1+%3 + (%2+%4>>1) |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
454 ; %3: %1+%3 - (%2+%4>>1) |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
455 ; %2: %1-%3 + (%2>>1-%4) |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
456 ; %1: %1-%3 - (%2>>1-%4) |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
457 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
458 SUMSUBD2_AB m%2, m%4, [%5], [%5+16] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
459 SUMSUB_BA m%3, m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
460 SUMSUB_BADC m%4, m%3, m%2, m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
461 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
462 SWAP %1, %4, %3 |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
463 ; %1: %1+%3 + (%2+%4>>1) row0 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
464 ; %2: %1-%3 + (%2>>1-%4) row1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
465 ; %3: %1-%3 - (%2>>1-%4) row2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
466 ; %4: %1+%3 - (%2+%4>>1) row3 |
8510 | 467 %endmacro |
468 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
469 |
8510 | 470 %macro LOAD_DIFF 5 |
471 %ifidn %3, none | |
472 movh %1, %4 | |
473 movh %2, %5 | |
474 punpcklbw %1, %2 | |
475 punpcklbw %2, %2 | |
476 psubw %1, %2 | |
477 %else | |
478 movh %1, %4 | |
479 punpcklbw %1, %3 | |
480 movh %2, %5 | |
481 punpcklbw %2, %3 | |
482 psubw %1, %2 | |
483 %endif | |
484 %endmacro | |
485 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
486 %macro LOAD_DIFF8x4_SSE2 8 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
487 LOAD_DIFF m%1, m%5, m%6, [%7+%1*FENC_STRIDE], [%8+%1*FDEC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
488 LOAD_DIFF m%2, m%5, m%6, [%7+%2*FENC_STRIDE], [%8+%2*FDEC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
489 LOAD_DIFF m%3, m%5, m%6, [%7+%3*FENC_STRIDE], [%8+%3*FDEC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
490 LOAD_DIFF m%4, m%5, m%6, [%7+%4*FENC_STRIDE], [%8+%4*FDEC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
491 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
492 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
493 %macro LOAD_DIFF8x4_SSSE3 8 ; 4x dst, 1x tmp, 1x mul, 2x ptr |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
494 movh m%2, [%8+%1*FDEC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
495 movh m%1, [%7+%1*FENC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
496 punpcklbw m%1, m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
497 movh m%3, [%8+%2*FDEC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
498 movh m%2, [%7+%2*FENC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
499 punpcklbw m%2, m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
500 movh m%4, [%8+%3*FDEC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
501 movh m%3, [%7+%3*FENC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
502 punpcklbw m%3, m%4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
503 movh m%5, [%8+%4*FDEC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
504 movh m%4, [%7+%4*FENC_STRIDE] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
505 punpcklbw m%4, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
506 pmaddubsw m%1, m%6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
507 pmaddubsw m%2, m%6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
508 pmaddubsw m%3, m%6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
509 pmaddubsw m%4, m%6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
510 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
511 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
512 %macro STORE_DCT 6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
513 movq [%5+%6+ 0], m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
514 movq [%5+%6+ 8], m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
515 movq [%5+%6+16], m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
516 movq [%5+%6+24], m%4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
517 movhps [%5+%6+32], m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
518 movhps [%5+%6+40], m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
519 movhps [%5+%6+48], m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
520 movhps [%5+%6+56], m%4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
521 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
522 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
523 %macro STORE_IDCT 4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
524 movhps [r0-4*FDEC_STRIDE], %1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
525 movh [r0-3*FDEC_STRIDE], %1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
526 movhps [r0-2*FDEC_STRIDE], %2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
527 movh [r0-1*FDEC_STRIDE], %2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
528 movhps [r0+0*FDEC_STRIDE], %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
529 movh [r0+1*FDEC_STRIDE], %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
530 movhps [r0+2*FDEC_STRIDE], %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
531 movh [r0+3*FDEC_STRIDE], %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
532 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
533 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
534 %macro LOAD_DIFF_8x4P 7-10 r0,r2,0 ; 4x dest, 2x temp, 2x pointer, increment? |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
535 LOAD_DIFF m%1, m%5, m%7, [%8], [%9] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
536 LOAD_DIFF m%2, m%6, m%7, [%8+r1], [%9+r3] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
537 LOAD_DIFF m%3, m%5, m%7, [%8+2*r1], [%9+2*r3] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
538 LOAD_DIFF m%4, m%6, m%7, [%8+r4], [%9+r5] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
539 %if %10 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
540 lea %8, [%8+4*r1] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
541 lea %9, [%9+4*r3] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
542 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
543 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
544 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
545 %macro DIFFx2 6-7 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
546 movh %3, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
547 punpcklbw %3, %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
548 psraw %1, 6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
549 paddsw %1, %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
550 movh %3, %6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
551 punpcklbw %3, %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
552 psraw %2, 6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
553 paddsw %2, %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
554 packuswb %2, %1 |
8510 | 555 %endmacro |
556 | |
557 %macro STORE_DIFF 4 | |
558 movh %2, %4 | |
559 punpcklbw %2, %3 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
560 psraw %1, 6 |
8510 | 561 paddsw %1, %2 |
562 packuswb %1, %1 | |
563 movh %4, %1 | |
564 %endmacro |