Mercurial > libavcodec.hg
annotate x86/x86util.asm @ 12494:94eaea836bf4 libavcodec
Check avctx width/height more thoroughly (e.g. all values 0 except width would
have been accepted before).
Also do not fail if they are invalid but instead override them to 0.
This allows decoding e.g. MPEG video when only the container values are corrupted.
For encoding a value of 0,0 of course makes no sense, but was allowed
through before and will be caught by an extra check in the encode function.
author | reimar |
---|---|
date | Wed, 15 Sep 2010 04:46:55 +0000 |
parents | 846779f6b164 |
children | c997f09d1e10 |
rev | line source |
---|---|
8510 | 1 ;***************************************************************************** |
8804
ba83a0c57e9f
Fix wrong file name in header, noticed by David DeHaven, dave sagetv com.
diego
parents:
8510
diff
changeset
|
2 ;* x86util.asm |
8510 | 3 ;***************************************************************************** |
12005
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
4 ;* Copyright (C) 2008-2010 x264 project |
8510 | 5 ;* |
12005
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
6 ;* Authors: Loren Merritt <lorenm@u.washington.edu> |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
7 ;* Holger Lubitz <holger@lubitz.org> |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
8 ;* |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
9 ;* This file is part of FFmpeg. |
8510 | 10 ;* |
12005
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
11 ;* FFmpeg is free software; you can redistribute it and/or |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
12 ;* modify it under the terms of the GNU Lesser General Public |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
13 ;* License as published by the Free Software Foundation; either |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
14 ;* version 2.1 of the License, or (at your option) any later version. |
8510 | 15 ;* |
12005
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
16 ;* FFmpeg is distributed in the hope that it will be useful, |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
17 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
18 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
19 ;* Lesser General Public License for more details. |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
20 ;* |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
21 ;* You should have received a copy of the GNU Lesser General Public |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
22 ;* License along with FFmpeg; if not, write to the Free Software |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
23 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
88563eada57f
Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents:
11931
diff
changeset
|
24 ;****************************************************************************** |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
25 |
8510 | 26 %macro SBUTTERFLY 4 |
27 mova m%4, m%2 | |
28 punpckl%1 m%2, m%3 | |
29 punpckh%1 m%4, m%3 | |
30 SWAP %3, %4 | |
31 %endmacro | |
32 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
33 %macro SBUTTERFLY2 4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
34 mova m%4, m%2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
35 punpckh%1 m%2, m%3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
36 punpckl%1 m%4, m%3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
37 SWAP %2, %4, %3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
38 %endmacro |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
39 |
12086
d780ae746855
Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents:
12013
diff
changeset
|
40 %macro TRANSPOSE4x4B 5 |
d780ae746855
Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents:
12013
diff
changeset
|
41 SBUTTERFLY bw, %1, %2, %5 |
d780ae746855
Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents:
12013
diff
changeset
|
42 SBUTTERFLY bw, %3, %4, %5 |
d780ae746855
Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents:
12013
diff
changeset
|
43 SBUTTERFLY wd, %1, %3, %5 |
d780ae746855
Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents:
12013
diff
changeset
|
44 SBUTTERFLY wd, %2, %4, %5 |
d780ae746855
Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents:
12013
diff
changeset
|
45 SWAP %2, %3 |
d780ae746855
Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents:
12013
diff
changeset
|
46 %endmacro |
d780ae746855
Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents:
12013
diff
changeset
|
47 |
8510 | 48 %macro TRANSPOSE4x4W 5 |
49 SBUTTERFLY wd, %1, %2, %5 | |
50 SBUTTERFLY wd, %3, %4, %5 | |
51 SBUTTERFLY dq, %1, %3, %5 | |
52 SBUTTERFLY dq, %2, %4, %5 | |
53 SWAP %2, %3 | |
54 %endmacro | |
55 | |
56 %macro TRANSPOSE2x4x4W 5 | |
57 SBUTTERFLY wd, %1, %2, %5 | |
58 SBUTTERFLY wd, %3, %4, %5 | |
59 SBUTTERFLY dq, %1, %3, %5 | |
60 SBUTTERFLY dq, %2, %4, %5 | |
61 SBUTTERFLY qdq, %1, %2, %5 | |
62 SBUTTERFLY qdq, %3, %4, %5 | |
63 %endmacro | |
64 | |
65 %macro TRANSPOSE4x4D 5 | |
66 SBUTTERFLY dq, %1, %2, %5 | |
67 SBUTTERFLY dq, %3, %4, %5 | |
68 SBUTTERFLY qdq, %1, %3, %5 | |
69 SBUTTERFLY qdq, %2, %4, %5 | |
70 SWAP %2, %3 | |
71 %endmacro | |
72 | |
73 %macro TRANSPOSE8x8W 9-11 | |
74 %ifdef ARCH_X86_64 | |
75 SBUTTERFLY wd, %1, %2, %9 | |
76 SBUTTERFLY wd, %3, %4, %9 | |
77 SBUTTERFLY wd, %5, %6, %9 | |
78 SBUTTERFLY wd, %7, %8, %9 | |
79 SBUTTERFLY dq, %1, %3, %9 | |
80 SBUTTERFLY dq, %2, %4, %9 | |
81 SBUTTERFLY dq, %5, %7, %9 | |
82 SBUTTERFLY dq, %6, %8, %9 | |
83 SBUTTERFLY qdq, %1, %5, %9 | |
84 SBUTTERFLY qdq, %2, %6, %9 | |
85 SBUTTERFLY qdq, %3, %7, %9 | |
86 SBUTTERFLY qdq, %4, %8, %9 | |
87 SWAP %2, %5 | |
88 SWAP %4, %7 | |
89 %else | |
90 ; in: m0..m7, unless %11 in which case m6 is in %9 | |
91 ; out: m0..m7, unless %11 in which case m4 is in %10 | |
92 ; spills into %9 and %10 | |
93 %if %0<11 | |
94 movdqa %9, m%7 | |
95 %endif | |
96 SBUTTERFLY wd, %1, %2, %7 | |
97 movdqa %10, m%2 | |
98 movdqa m%7, %9 | |
99 SBUTTERFLY wd, %3, %4, %2 | |
100 SBUTTERFLY wd, %5, %6, %2 | |
101 SBUTTERFLY wd, %7, %8, %2 | |
102 SBUTTERFLY dq, %1, %3, %2 | |
103 movdqa %9, m%3 | |
104 movdqa m%2, %10 | |
105 SBUTTERFLY dq, %2, %4, %3 | |
106 SBUTTERFLY dq, %5, %7, %3 | |
107 SBUTTERFLY dq, %6, %8, %3 | |
108 SBUTTERFLY qdq, %1, %5, %3 | |
109 SBUTTERFLY qdq, %2, %6, %3 | |
110 movdqa %10, m%2 | |
111 movdqa m%3, %9 | |
112 SBUTTERFLY qdq, %3, %7, %2 | |
113 SBUTTERFLY qdq, %4, %8, %2 | |
114 SWAP %2, %5 | |
115 SWAP %4, %7 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
116 %if %0<11 |
8510 | 117 movdqa m%5, %10 |
118 %endif | |
119 %endif | |
120 %endmacro | |
121 | |
12144 | 122 ; PABSW macros assume %1 != %2, while ABS1/2 macros work in-place |
123 %macro PABSW_MMX 2 | |
124 pxor %1, %1 | |
125 pcmpgtw %1, %2 | |
126 pxor %2, %1 | |
127 psubw %2, %1 | |
128 SWAP %1, %2 | |
129 %endmacro | |
130 | |
131 %macro PSIGNW_MMX 2 | |
132 pxor %1, %2 | |
133 psubw %1, %2 | |
134 %endmacro | |
135 | |
136 %macro PABSW_MMX2 2 | |
137 pxor %1, %1 | |
138 psubw %1, %2 | |
139 pmaxsw %1, %2 | |
140 %endmacro | |
141 | |
142 %macro PABSW_SSSE3 2 | |
143 pabsw %1, %2 | |
144 %endmacro | |
145 | |
146 %macro PSIGNW_SSSE3 2 | |
147 psignw %1, %2 | |
148 %endmacro | |
149 | |
8510 | 150 %macro ABS1_MMX 2 ; a, tmp |
151 pxor %2, %2 | |
152 psubw %2, %1 | |
153 pmaxsw %1, %2 | |
154 %endmacro | |
155 | |
156 %macro ABS2_MMX 4 ; a, b, tmp0, tmp1 | |
157 pxor %3, %3 | |
158 pxor %4, %4 | |
159 psubw %3, %1 | |
160 psubw %4, %2 | |
161 pmaxsw %1, %3 | |
162 pmaxsw %2, %4 | |
163 %endmacro | |
164 | |
165 %macro ABS1_SSSE3 2 | |
166 pabsw %1, %1 | |
167 %endmacro | |
168 | |
169 %macro ABS2_SSSE3 4 | |
170 pabsw %1, %1 | |
171 pabsw %2, %2 | |
172 %endmacro | |
173 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
174 %macro ABSB_MMX 2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
175 pxor %2, %2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
176 psubb %2, %1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
177 pminub %1, %2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
178 %endmacro |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
179 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
180 %macro ABSB2_MMX 4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
181 pxor %3, %3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
182 pxor %4, %4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
183 psubb %3, %1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
184 psubb %4, %2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
185 pminub %1, %3 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
186 pminub %2, %4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
187 %endmacro |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
188 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
189 %macro ABSB_SSSE3 2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
190 pabsb %1, %1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
191 %endmacro |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
192 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
193 %macro ABSB2_SSSE3 4 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
194 pabsb %1, %1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
195 pabsb %2, %2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
196 %endmacro |
8510 | 197 |
198 %macro ABS4 6 | |
199 ABS2 %1, %2, %5, %6 | |
200 ABS2 %3, %4, %5, %6 | |
201 %endmacro | |
202 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
203 %define ABS1 ABS1_MMX |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
204 %define ABS2 ABS2_MMX |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
205 %define ABSB ABSB_MMX |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
206 %define ABSB2 ABSB2_MMX |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
207 |
8510 | 208 %macro SPLATB_MMX 3 |
209 movd %1, [%2-3] ;to avoid crossing a cacheline | |
210 punpcklbw %1, %1 | |
211 %if mmsize==16 | |
212 pshuflw %1, %1, 0xff | |
213 punpcklqdq %1, %1 | |
214 %else | |
215 pshufw %1, %1, 0xff | |
216 %endif | |
217 %endmacro | |
218 | |
219 %macro SPLATB_SSSE3 3 | |
220 movd %1, [%2-3] | |
221 pshufb %1, %3 | |
222 %endmacro | |
223 | |
224 %macro PALIGNR_MMX 4 | |
225 %ifnidn %4, %2 | |
226 mova %4, %2 | |
227 %endif | |
228 %if mmsize == 8 | |
229 psllq %1, (8-%3)*8 | |
230 psrlq %4, %3*8 | |
231 %else | |
232 pslldq %1, 16-%3 | |
233 psrldq %4, %3 | |
234 %endif | |
235 por %1, %4 | |
236 %endmacro | |
237 | |
238 %macro PALIGNR_SSSE3 4 | |
239 palignr %1, %2, %3 | |
240 %endmacro | |
241 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
242 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
243 %ifnum %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
244 mova m%1, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
245 mova m%3, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
246 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
247 mova m%1, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
248 mova m%3, m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
249 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
250 pand m%1, m%2 ; dst .. y6 .. y4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
251 pand m%3, m%4 ; src .. y6 .. y4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
252 psrlw m%2, 8 ; dst .. y7 .. y5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
253 psrlw m%4, 8 ; src .. y7 .. y5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
254 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
255 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
256 %macro SUMSUB_BA 2-3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
257 %if %0==2 |
8510 | 258 paddw %1, %2 |
259 paddw %2, %2 | |
260 psubw %2, %1 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
261 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
262 mova %3, %1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
263 paddw %1, %2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
264 psubw %2, %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
265 %endif |
8510 | 266 %endmacro |
267 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
268 %macro SUMSUB_BADC 4-5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
269 %if %0==5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
270 SUMSUB_BA %1, %2, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
271 SUMSUB_BA %3, %4, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
272 %else |
8510 | 273 paddw %1, %2 |
274 paddw %3, %4 | |
275 paddw %2, %2 | |
276 paddw %4, %4 | |
277 psubw %2, %1 | |
278 psubw %4, %3 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
279 %endif |
8510 | 280 %endmacro |
281 | |
282 %macro SUMSUB2_AB 3 | |
283 mova %3, %1 | |
284 paddw %1, %1 | |
285 paddw %1, %2 | |
286 psubw %3, %2 | |
287 psubw %3, %2 | |
288 %endmacro | |
289 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
290 %macro SUMSUB2_BA 3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
291 mova m%3, m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
292 paddw m%1, m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
293 paddw m%1, m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
294 psubw m%2, m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
295 psubw m%2, m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
296 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
297 |
8510 | 298 %macro SUMSUBD2_AB 4 |
299 mova %4, %1 | |
300 mova %3, %2 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
301 psraw %2, 1 ; %2: %2>>1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
302 psraw %1, 1 ; %1: %1>>1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
303 paddw %2, %4 ; %2: %2>>1+%1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
304 psubw %1, %3 ; %1: %1>>1-%2 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
305 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
306 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
307 %macro DCT4_1D 5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
308 %ifnum %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
309 SUMSUB_BADC m%4, m%1, m%3, m%2; m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
310 SUMSUB_BA m%3, m%4, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
311 SUMSUB2_AB m%1, m%2, m%5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
312 SWAP %1, %3, %4, %5, %2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
313 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
314 SUMSUB_BADC m%4, m%1, m%3, m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
315 SUMSUB_BA m%3, m%4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
316 mova [%5], m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
317 SUMSUB2_AB m%1, [%5], m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
318 SWAP %1, %3, %4, %2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
319 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
320 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
321 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
322 %macro IDCT4_1D 5-6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
323 %ifnum %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
324 SUMSUBD2_AB m%2, m%4, m%6, m%5 |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
325 ; %2: %2>>1-%4 %4: %2+%4>>1 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
326 SUMSUB_BA m%3, m%1, m%6 |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
327 ; %3: %1+%3 %1: %1-%3 |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
328 SUMSUB_BADC m%4, m%3, m%2, m%1, m%6 |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
329 ; %4: %1+%3 + (%2+%4>>1) |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
330 ; %3: %1+%3 - (%2+%4>>1) |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
331 ; %2: %1-%3 + (%2>>1-%4) |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
332 ; %1: %1-%3 - (%2>>1-%4) |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
333 %else |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
334 SUMSUBD2_AB m%2, m%4, [%5], [%5+16] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
335 SUMSUB_BA m%3, m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
336 SUMSUB_BADC m%4, m%3, m%2, m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
337 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
338 SWAP %1, %4, %3 |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
339 ; %1: %1+%3 + (%2+%4>>1) row0 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
340 ; %2: %1-%3 + (%2>>1-%4) row1 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
341 ; %3: %1-%3 - (%2>>1-%4) row2 |
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
342 ; %4: %1+%3 - (%2+%4>>1) row3 |
8510 | 343 %endmacro |
344 | |
11931
980030a3e315
Update x264asm header files to latest versions.
darkshikari
parents:
10019
diff
changeset
|
345 |
8510 | 346 %macro LOAD_DIFF 5 |
347 %ifidn %3, none | |
348 movh %1, %4 | |
349 movh %2, %5 | |
350 punpcklbw %1, %2 | |
351 punpcklbw %2, %2 | |
352 psubw %1, %2 | |
353 %else | |
354 movh %1, %4 | |
355 punpcklbw %1, %3 | |
356 movh %2, %5 | |
357 punpcklbw %2, %3 | |
358 psubw %1, %2 | |
359 %endif | |
360 %endmacro | |
361 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
362 %macro STORE_DCT 6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
363 movq [%5+%6+ 0], m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
364 movq [%5+%6+ 8], m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
365 movq [%5+%6+16], m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
366 movq [%5+%6+24], m%4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
367 movhps [%5+%6+32], m%1 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
368 movhps [%5+%6+40], m%2 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
369 movhps [%5+%6+48], m%3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
370 movhps [%5+%6+56], m%4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
371 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
372 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
373 %macro LOAD_DIFF_8x4P 7-10 r0,r2,0 ; 4x dest, 2x temp, 2x pointer, increment? |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
374 LOAD_DIFF m%1, m%5, m%7, [%8], [%9] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
375 LOAD_DIFF m%2, m%6, m%7, [%8+r1], [%9+r3] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
376 LOAD_DIFF m%3, m%5, m%7, [%8+2*r1], [%9+2*r3] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
377 LOAD_DIFF m%4, m%6, m%7, [%8+r4], [%9+r5] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
378 %if %10 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
379 lea %8, [%8+4*r1] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
380 lea %9, [%9+4*r3] |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
381 %endif |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
382 %endmacro |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
383 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
384 %macro DIFFx2 6-7 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
385 movh %3, %5 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
386 punpcklbw %3, %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
387 psraw %1, 6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
388 paddsw %1, %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
389 movh %3, %6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
390 punpcklbw %3, %4 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
391 psraw %2, 6 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
392 paddsw %2, %3 |
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
393 packuswb %2, %1 |
8510 | 394 %endmacro |
395 | |
396 %macro STORE_DIFF 4 | |
397 movh %2, %4 | |
398 punpcklbw %2, %3 | |
10019
c08ca946c80a
Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents:
8804
diff
changeset
|
399 psraw %1, 6 |
8510 | 400 paddsw %1, %2 |
401 packuswb %1, %1 | |
402 movh %4, %1 | |
403 %endmacro | |
12013 | 404 |
405 %macro STORE_DIFFx2 8 ; add1, add2, reg1, reg2, zero, shift, source, stride | |
406 movh %3, [%7] | |
407 movh %4, [%7+%8] | |
408 punpcklbw %3, %5 | |
409 punpcklbw %4, %5 | |
410 psraw %1, %6 | |
411 psraw %2, %6 | |
412 paddw %3, %1 | |
413 paddw %4, %2 | |
414 packuswb %3, %5 | |
415 packuswb %4, %5 | |
416 movh [%7], %3 | |
417 movh [%7+%8], %4 | |
418 %endmacro |