annotate x86/x86util.asm @ 12483:0159a19bfff7 libavcodec

aacdec: Rework channel mapping compatibility hacks. For a PCE based configuration map the channels solely based on tags. For an indexed configuration map the channels solely based on position. This works with all known exotic samples including al17, elem_id0, bad_concat, and lfe_is_sce.
author alexc
date Fri, 10 Sep 2010 18:01:48 +0000
parents 846779f6b164
children c997f09d1e10
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
1 ;*****************************************************************************
8804
ba83a0c57e9f Fix wrong file name in header, noticed by David DeHaven, dave sagetv com.
diego
parents: 8510
diff changeset
2 ;* x86util.asm
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
3 ;*****************************************************************************
12005
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
4 ;* Copyright (C) 2008-2010 x264 project
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
5 ;*
12005
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
6 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
7 ;* Holger Lubitz <holger@lubitz.org>
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
8 ;*
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
9 ;* This file is part of FFmpeg.
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
10 ;*
12005
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
11 ;* FFmpeg is free software; you can redistribute it and/or
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
12 ;* modify it under the terms of the GNU Lesser General Public
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
13 ;* License as published by the Free Software Foundation; either
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
14 ;* version 2.1 of the License, or (at your option) any later version.
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
15 ;*
12005
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
16 ;* FFmpeg is distributed in the hope that it will be useful,
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
17 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
18 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
19 ;* Lesser General Public License for more details.
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
20 ;*
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
21 ;* You should have received a copy of the GNU Lesser General Public
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
22 ;* License along with FFmpeg; if not, write to the Free Software
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
23 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
88563eada57f Make x86util.asm LGPL so we can use it in LGPL asm
darkshikari
parents: 11931
diff changeset
24 ;******************************************************************************
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
25
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
26 %macro SBUTTERFLY 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
27 mova m%4, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
28 punpckl%1 m%2, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
29 punpckh%1 m%4, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
30 SWAP %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
31 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
32
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
33 %macro SBUTTERFLY2 4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
34 mova m%4, m%2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
35 punpckh%1 m%2, m%3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
36 punpckl%1 m%4, m%3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
37 SWAP %2, %4, %3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
38 %endmacro
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
39
12086
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 12013
diff changeset
40 %macro TRANSPOSE4x4B 5
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 12013
diff changeset
41 SBUTTERFLY bw, %1, %2, %5
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 12013
diff changeset
42 SBUTTERFLY bw, %3, %4, %5
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 12013
diff changeset
43 SBUTTERFLY wd, %1, %3, %5
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 12013
diff changeset
44 SBUTTERFLY wd, %2, %4, %5
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 12013
diff changeset
45 SWAP %2, %3
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 12013
diff changeset
46 %endmacro
d780ae746855 Simple H/V loopfilter for VP8 in MMX, MMX2 and SSE2 (yay for yasm macros).
rbultje
parents: 12013
diff changeset
47
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
48 %macro TRANSPOSE4x4W 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
49 SBUTTERFLY wd, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
50 SBUTTERFLY wd, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
51 SBUTTERFLY dq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
52 SBUTTERFLY dq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
53 SWAP %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
54 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
55
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
56 %macro TRANSPOSE2x4x4W 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
57 SBUTTERFLY wd, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
58 SBUTTERFLY wd, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
59 SBUTTERFLY dq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
60 SBUTTERFLY dq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
61 SBUTTERFLY qdq, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
62 SBUTTERFLY qdq, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
63 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
64
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
65 %macro TRANSPOSE4x4D 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
66 SBUTTERFLY dq, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
67 SBUTTERFLY dq, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
68 SBUTTERFLY qdq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
69 SBUTTERFLY qdq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
70 SWAP %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
71 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
72
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
73 %macro TRANSPOSE8x8W 9-11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
74 %ifdef ARCH_X86_64
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
75 SBUTTERFLY wd, %1, %2, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
76 SBUTTERFLY wd, %3, %4, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
77 SBUTTERFLY wd, %5, %6, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
78 SBUTTERFLY wd, %7, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
79 SBUTTERFLY dq, %1, %3, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
80 SBUTTERFLY dq, %2, %4, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
81 SBUTTERFLY dq, %5, %7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
82 SBUTTERFLY dq, %6, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
83 SBUTTERFLY qdq, %1, %5, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
84 SBUTTERFLY qdq, %2, %6, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
85 SBUTTERFLY qdq, %3, %7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
86 SBUTTERFLY qdq, %4, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
87 SWAP %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
88 SWAP %4, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
89 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
90 ; in: m0..m7, unless %11 in which case m6 is in %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
91 ; out: m0..m7, unless %11 in which case m4 is in %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
92 ; spills into %9 and %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
93 %if %0<11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
94 movdqa %9, m%7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
95 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
96 SBUTTERFLY wd, %1, %2, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
97 movdqa %10, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
98 movdqa m%7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
99 SBUTTERFLY wd, %3, %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
100 SBUTTERFLY wd, %5, %6, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
101 SBUTTERFLY wd, %7, %8, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
102 SBUTTERFLY dq, %1, %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
103 movdqa %9, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
104 movdqa m%2, %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
105 SBUTTERFLY dq, %2, %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
106 SBUTTERFLY dq, %5, %7, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
107 SBUTTERFLY dq, %6, %8, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
108 SBUTTERFLY qdq, %1, %5, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
109 SBUTTERFLY qdq, %2, %6, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
110 movdqa %10, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
111 movdqa m%3, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
112 SBUTTERFLY qdq, %3, %7, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
113 SBUTTERFLY qdq, %4, %8, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
114 SWAP %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
115 SWAP %4, %7
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
116 %if %0<11
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
117 movdqa m%5, %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
118 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
119 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
120 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
121
12144
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
122 ; PABSW macros assume %1 != %2, while ABS1/2 macros work in-place
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
123 %macro PABSW_MMX 2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
124 pxor %1, %1
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
125 pcmpgtw %1, %2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
126 pxor %2, %1
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
127 psubw %2, %1
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
128 SWAP %1, %2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
129 %endmacro
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
130
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
131 %macro PSIGNW_MMX 2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
132 pxor %1, %2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
133 psubw %1, %2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
134 %endmacro
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
135
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
136 %macro PABSW_MMX2 2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
137 pxor %1, %1
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
138 psubw %1, %2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
139 pmaxsw %1, %2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
140 %endmacro
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
141
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
142 %macro PABSW_SSSE3 2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
143 pabsw %1, %2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
144 %endmacro
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
145
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
146 %macro PSIGNW_SSSE3 2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
147 psignw %1, %2
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
148 %endmacro
846779f6b164 MMX/SSE VC1 loop filter
conrad
parents: 12086
diff changeset
149
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
150 %macro ABS1_MMX 2 ; a, tmp
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
151 pxor %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
152 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
153 pmaxsw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
154 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
155
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
156 %macro ABS2_MMX 4 ; a, b, tmp0, tmp1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
157 pxor %3, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
158 pxor %4, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
159 psubw %3, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
160 psubw %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
161 pmaxsw %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
162 pmaxsw %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
163 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
164
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
165 %macro ABS1_SSSE3 2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
166 pabsw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
167 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
168
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
169 %macro ABS2_SSSE3 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
170 pabsw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
171 pabsw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
172 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
173
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
174 %macro ABSB_MMX 2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
175 pxor %2, %2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
176 psubb %2, %1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
177 pminub %1, %2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
178 %endmacro
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
179
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
180 %macro ABSB2_MMX 4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
181 pxor %3, %3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
182 pxor %4, %4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
183 psubb %3, %1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
184 psubb %4, %2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
185 pminub %1, %3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
186 pminub %2, %4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
187 %endmacro
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
188
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
189 %macro ABSB_SSSE3 2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
190 pabsb %1, %1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
191 %endmacro
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
192
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
193 %macro ABSB2_SSSE3 4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
194 pabsb %1, %1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
195 pabsb %2, %2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
196 %endmacro
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
197
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
198 %macro ABS4 6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
199 ABS2 %1, %2, %5, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
200 ABS2 %3, %4, %5, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
201 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
202
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
203 %define ABS1 ABS1_MMX
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
204 %define ABS2 ABS2_MMX
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
205 %define ABSB ABSB_MMX
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
206 %define ABSB2 ABSB2_MMX
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
207
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
208 %macro SPLATB_MMX 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
209 movd %1, [%2-3] ;to avoid crossing a cacheline
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
210 punpcklbw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
211 %if mmsize==16
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
212 pshuflw %1, %1, 0xff
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
213 punpcklqdq %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
214 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
215 pshufw %1, %1, 0xff
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
216 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
217 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
218
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
219 %macro SPLATB_SSSE3 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
220 movd %1, [%2-3]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
221 pshufb %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
222 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
223
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
224 %macro PALIGNR_MMX 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
225 %ifnidn %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
226 mova %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
227 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
228 %if mmsize == 8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
229 psllq %1, (8-%3)*8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
230 psrlq %4, %3*8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
231 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
232 pslldq %1, 16-%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
233 psrldq %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
234 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
235 por %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
236 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
237
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
238 %macro PALIGNR_SSSE3 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
239 palignr %1, %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
240 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
241
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
242 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
243 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
244 mova m%1, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
245 mova m%3, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
246 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
247 mova m%1, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
248 mova m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
249 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
250 pand m%1, m%2 ; dst .. y6 .. y4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
251 pand m%3, m%4 ; src .. y6 .. y4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
252 psrlw m%2, 8 ; dst .. y7 .. y5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
253 psrlw m%4, 8 ; src .. y7 .. y5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
254 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
255
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
256 %macro SUMSUB_BA 2-3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
257 %if %0==2
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
258 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
259 paddw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
260 psubw %2, %1
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
261 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
262 mova %3, %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
263 paddw %1, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
264 psubw %2, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
265 %endif
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
266 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
267
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
268 %macro SUMSUB_BADC 4-5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
269 %if %0==5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
270 SUMSUB_BA %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
271 SUMSUB_BA %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
272 %else
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
273 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
274 paddw %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
275 paddw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
276 paddw %4, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
277 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
278 psubw %4, %3
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
279 %endif
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
280 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
281
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
282 %macro SUMSUB2_AB 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
283 mova %3, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
284 paddw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
285 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
286 psubw %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
287 psubw %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
288 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
289
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
290 %macro SUMSUB2_BA 3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
291 mova m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
292 paddw m%1, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
293 paddw m%1, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
294 psubw m%2, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
295 psubw m%2, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
296 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
297
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
298 %macro SUMSUBD2_AB 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
299 mova %4, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
300 mova %3, %2
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
301 psraw %2, 1 ; %2: %2>>1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
302 psraw %1, 1 ; %1: %1>>1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
303 paddw %2, %4 ; %2: %2>>1+%1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
304 psubw %1, %3 ; %1: %1>>1-%2
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
305 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
306
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
307 %macro DCT4_1D 5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
308 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
309 SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
310 SUMSUB_BA m%3, m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
311 SUMSUB2_AB m%1, m%2, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
312 SWAP %1, %3, %4, %5, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
313 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
314 SUMSUB_BADC m%4, m%1, m%3, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
315 SUMSUB_BA m%3, m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
316 mova [%5], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
317 SUMSUB2_AB m%1, [%5], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
318 SWAP %1, %3, %4, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
319 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
320 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
321
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
322 %macro IDCT4_1D 5-6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
323 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
324 SUMSUBD2_AB m%2, m%4, m%6, m%5
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
325 ; %2: %2>>1-%4 %4: %2+%4>>1
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
326 SUMSUB_BA m%3, m%1, m%6
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
327 ; %3: %1+%3 %1: %1-%3
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
328 SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
329 ; %4: %1+%3 + (%2+%4>>1)
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
330 ; %3: %1+%3 - (%2+%4>>1)
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
331 ; %2: %1-%3 + (%2>>1-%4)
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
332 ; %1: %1-%3 - (%2>>1-%4)
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
333 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
334 SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
335 SUMSUB_BA m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
336 SUMSUB_BADC m%4, m%3, m%2, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
337 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
338 SWAP %1, %4, %3
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
339 ; %1: %1+%3 + (%2+%4>>1) row0
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
340 ; %2: %1-%3 + (%2>>1-%4) row1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
341 ; %3: %1-%3 - (%2>>1-%4) row2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
342 ; %4: %1+%3 - (%2+%4>>1) row3
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
343 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
344
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
345
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
346 %macro LOAD_DIFF 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
347 %ifidn %3, none
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
348 movh %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
349 movh %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
350 punpcklbw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
351 punpcklbw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
352 psubw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
353 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
354 movh %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
355 punpcklbw %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
356 movh %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
357 punpcklbw %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
358 psubw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
359 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
360 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
361
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
362 %macro STORE_DCT 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
363 movq [%5+%6+ 0], m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
364 movq [%5+%6+ 8], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
365 movq [%5+%6+16], m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
366 movq [%5+%6+24], m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
367 movhps [%5+%6+32], m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
368 movhps [%5+%6+40], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
369 movhps [%5+%6+48], m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
370 movhps [%5+%6+56], m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
371 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
372
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
373 %macro LOAD_DIFF_8x4P 7-10 r0,r2,0 ; 4x dest, 2x temp, 2x pointer, increment?
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
374 LOAD_DIFF m%1, m%5, m%7, [%8], [%9]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
375 LOAD_DIFF m%2, m%6, m%7, [%8+r1], [%9+r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
376 LOAD_DIFF m%3, m%5, m%7, [%8+2*r1], [%9+2*r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
377 LOAD_DIFF m%4, m%6, m%7, [%8+r4], [%9+r5]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
378 %if %10
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
379 lea %8, [%8+4*r1]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
380 lea %9, [%9+4*r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
381 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
382 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
383
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
384 %macro DIFFx2 6-7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
385 movh %3, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
386 punpcklbw %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
387 psraw %1, 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
388 paddsw %1, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
389 movh %3, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
390 punpcklbw %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
391 psraw %2, 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
392 paddsw %2, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
393 packuswb %2, %1
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
394 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
395
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
396 %macro STORE_DIFF 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
397 movh %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
398 punpcklbw %2, %3
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
399 psraw %1, 6
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
400 paddsw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
401 packuswb %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
402 movh %4, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
403 %endmacro
12013
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
404
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
405 %macro STORE_DIFFx2 8 ; add1, add2, reg1, reg2, zero, shift, source, stride
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
406 movh %3, [%7]
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
407 movh %4, [%7+%8]
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
408 punpcklbw %3, %5
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
409 punpcklbw %4, %5
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
410 psraw %1, %6
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
411 psraw %2, %6
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
412 paddw %3, %1
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
413 paddw %4, %2
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
414 packuswb %3, %5
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
415 packuswb %4, %5
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
416 movh [%7], %3
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
417 movh [%7+%8], %4
2ae70e2c31a4 MMX idct_add for VP8.
rbultje
parents: 12005
diff changeset
418 %endmacro