annotate x86/x86util.asm @ 11980:263b4ef7ad87 libavcodec

tablegen: implement and use WRITE_ARRAY macros Two macros (WRITE_ARRAY and WRITE_ARRAY_2D) take the prefix (modifiers) (not all tables are static, and they might not be constant either), the type, and the name of the array. It'll be copied with same name and type, and with the correct size of the currently-defined object.
author flameeyes
date Sun, 27 Jun 2010 12:21:12 +0000
parents 980030a3e315
children 88563eada57f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
1 ;*****************************************************************************
8804
ba83a0c57e9f Fix wrong file name in header, noticed by David DeHaven, dave sagetv com.
diego
parents: 8510
diff changeset
2 ;* x86util.asm
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
3 ;*****************************************************************************
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
4 ;* Copyright (C) 2008 x264 project
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
5 ;*
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
6 ;* Authors: Holger Lubitz <holger@lubitz.org>
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
7 ;* Loren Merritt <lorenm@u.washington.edu>
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
8 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
9 ;* This program is free software; you can redistribute it and/or modify
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
10 ;* it under the terms of the GNU General Public License as published by
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
11 ;* the Free Software Foundation; either version 2 of the License, or
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
12 ;* (at your option) any later version.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
13 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
14 ;* This program is distributed in the hope that it will be useful,
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
17 ;* GNU General Public License for more details.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
18 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
19 ;* You should have received a copy of the GNU General Public License
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
20 ;* along with this program; if not, write to the Free Software
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
22 ;*****************************************************************************
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
23
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
24 %assign FENC_STRIDE 16
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
25 %assign FDEC_STRIDE 32
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
26
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
27 %macro SBUTTERFLY 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
28 mova m%4, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
29 punpckl%1 m%2, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
30 punpckh%1 m%4, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
31 SWAP %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
32 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
33
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
34 %macro SBUTTERFLY2 4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
35 mova m%4, m%2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
36 punpckh%1 m%2, m%3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
37 punpckl%1 m%4, m%3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
38 SWAP %2, %4, %3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
39 %endmacro
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
40
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
41 %macro TRANSPOSE4x4W 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
42 SBUTTERFLY wd, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
43 SBUTTERFLY wd, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
44 SBUTTERFLY dq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
45 SBUTTERFLY dq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
46 SWAP %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
47 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
48
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
49 %macro TRANSPOSE2x4x4W 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
50 SBUTTERFLY wd, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
51 SBUTTERFLY wd, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
52 SBUTTERFLY dq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
53 SBUTTERFLY dq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
54 SBUTTERFLY qdq, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
55 SBUTTERFLY qdq, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
56 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
57
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
58 %macro TRANSPOSE4x4D 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
59 SBUTTERFLY dq, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
60 SBUTTERFLY dq, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
61 SBUTTERFLY qdq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
62 SBUTTERFLY qdq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
63 SWAP %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
64 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
65
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
66 %macro TRANSPOSE8x8W 9-11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
67 %ifdef ARCH_X86_64
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
68 SBUTTERFLY wd, %1, %2, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
69 SBUTTERFLY wd, %3, %4, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
70 SBUTTERFLY wd, %5, %6, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
71 SBUTTERFLY wd, %7, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
72 SBUTTERFLY dq, %1, %3, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
73 SBUTTERFLY dq, %2, %4, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
74 SBUTTERFLY dq, %5, %7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
75 SBUTTERFLY dq, %6, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
76 SBUTTERFLY qdq, %1, %5, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
77 SBUTTERFLY qdq, %2, %6, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
78 SBUTTERFLY qdq, %3, %7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
79 SBUTTERFLY qdq, %4, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
80 SWAP %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
81 SWAP %4, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
82 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
83 ; in: m0..m7, unless %11 in which case m6 is in %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
84 ; out: m0..m7, unless %11 in which case m4 is in %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
85 ; spills into %9 and %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
86 %if %0<11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
87 movdqa %9, m%7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
88 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
89 SBUTTERFLY wd, %1, %2, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
90 movdqa %10, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
91 movdqa m%7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
92 SBUTTERFLY wd, %3, %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
93 SBUTTERFLY wd, %5, %6, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
94 SBUTTERFLY wd, %7, %8, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
95 SBUTTERFLY dq, %1, %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
96 movdqa %9, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
97 movdqa m%2, %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
98 SBUTTERFLY dq, %2, %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
99 SBUTTERFLY dq, %5, %7, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
100 SBUTTERFLY dq, %6, %8, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
101 SBUTTERFLY qdq, %1, %5, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
102 SBUTTERFLY qdq, %2, %6, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
103 movdqa %10, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
104 movdqa m%3, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
105 SBUTTERFLY qdq, %3, %7, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
106 SBUTTERFLY qdq, %4, %8, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
107 SWAP %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
108 SWAP %4, %7
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
109 %if %0<11
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
110 movdqa m%5, %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
111 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
112 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
113 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
114
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
115 %macro ABS1_MMX 2 ; a, tmp
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
116 pxor %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
117 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
118 pmaxsw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
119 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
120
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
121 %macro ABS2_MMX 4 ; a, b, tmp0, tmp1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
122 pxor %3, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
123 pxor %4, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
124 psubw %3, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
125 psubw %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
126 pmaxsw %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
127 pmaxsw %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
128 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
129
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
130 %macro ABS1_SSSE3 2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
131 pabsw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
132 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
133
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
134 %macro ABS2_SSSE3 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
135 pabsw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
136 pabsw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
137 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
138
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
139 %macro ABSB_MMX 2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
140 pxor %2, %2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
141 psubb %2, %1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
142 pminub %1, %2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
143 %endmacro
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
144
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
145 %macro ABSB2_MMX 4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
146 pxor %3, %3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
147 pxor %4, %4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
148 psubb %3, %1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
149 psubb %4, %2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
150 pminub %1, %3
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
151 pminub %2, %4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
152 %endmacro
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
153
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
154 %macro ABSB_SSSE3 2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
155 pabsb %1, %1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
156 %endmacro
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
157
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
158 %macro ABSB2_SSSE3 4
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
159 pabsb %1, %1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
160 pabsb %2, %2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
161 %endmacro
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
162
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
163 %macro ABS4 6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
164 ABS2 %1, %2, %5, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
165 ABS2 %3, %4, %5, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
166 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
167
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
168 %define ABS1 ABS1_MMX
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
169 %define ABS2 ABS2_MMX
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
170 %define ABSB ABSB_MMX
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
171 %define ABSB2 ABSB2_MMX
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
172
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
173 %macro SPLATB_MMX 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
174 movd %1, [%2-3] ;to avoid crossing a cacheline
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
175 punpcklbw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
176 %if mmsize==16
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
177 pshuflw %1, %1, 0xff
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
178 punpcklqdq %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
179 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
180 pshufw %1, %1, 0xff
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
181 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
182 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
183
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
184 %macro SPLATB_SSSE3 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
185 movd %1, [%2-3]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
186 pshufb %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
187 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
188
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
189 %macro PALIGNR_MMX 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
190 %ifnidn %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
191 mova %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
192 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
193 %if mmsize == 8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
194 psllq %1, (8-%3)*8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
195 psrlq %4, %3*8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
196 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
197 pslldq %1, 16-%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
198 psrldq %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
199 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
200 por %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
201 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
202
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
203 %macro PALIGNR_SSSE3 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
204 palignr %1, %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
205 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
206
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
207 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
208 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
209 mova m%1, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
210 mova m%3, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
211 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
212 mova m%1, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
213 mova m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
214 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
215 pand m%1, m%2 ; dst .. y6 .. y4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
216 pand m%3, m%4 ; src .. y6 .. y4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
217 psrlw m%2, 8 ; dst .. y7 .. y5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
218 psrlw m%4, 8 ; src .. y7 .. y5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
219 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
220
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
221 %macro SUMSUB_BA 2-3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
222 %if %0==2
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
223 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
224 paddw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
225 psubw %2, %1
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
226 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
227 mova %3, %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
228 paddw %1, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
229 psubw %2, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
230 %endif
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
231 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
232
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
233 %macro SUMSUB_BADC 4-5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
234 %if %0==5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
235 SUMSUB_BA %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
236 SUMSUB_BA %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
237 %else
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
238 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
239 paddw %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
240 paddw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
241 paddw %4, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
242 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
243 psubw %4, %3
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
244 %endif
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
245 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
246
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
247 %macro HADAMARD4_V 4+
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
248 SUMSUB_BADC %1, %2, %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
249 SUMSUB_BADC %1, %3, %2, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
250 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
251
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
252 %macro HADAMARD8_V 8+
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
253 SUMSUB_BADC %1, %2, %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
254 SUMSUB_BADC %5, %6, %7, %8
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
255 SUMSUB_BADC %1, %3, %2, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
256 SUMSUB_BADC %5, %7, %6, %8
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
257 SUMSUB_BADC %1, %5, %2, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
258 SUMSUB_BADC %3, %7, %4, %8
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
259 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
260
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
261 %macro TRANS_SSE2 5-6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
262 ; TRANSPOSE2x2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
263 ; %1: transpose width (d/q) - use SBUTTERFLY qdq for dq
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
264 ; %2: ord/unord (for compat with sse4, unused)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
265 ; %3/%4: source regs
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
266 ; %5/%6: tmp regs
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
267 %ifidn %1, d
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
268 %define mask [mask_10]
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
269 %define shift 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
270 %elifidn %1, q
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
271 %define mask [mask_1100]
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
272 %define shift 32
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
273 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
274 %if %0==6 ; less dependency if we have two tmp
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
275 mova m%5, mask ; ff00
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
276 mova m%6, m%4 ; x5x4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
277 psll%1 m%4, shift ; x4..
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
278 pand m%6, m%5 ; x5..
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
279 pandn m%5, m%3 ; ..x0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
280 psrl%1 m%3, shift ; ..x1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
281 por m%4, m%5 ; x4x0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
282 por m%3, m%6 ; x5x1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
283 %else ; more dependency, one insn less. sometimes faster, sometimes not
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
284 mova m%5, m%4 ; x5x4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
285 psll%1 m%4, shift ; x4..
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
286 pxor m%4, m%3 ; (x4^x1)x0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
287 pand m%4, mask ; (x4^x1)..
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
288 pxor m%3, m%4 ; x4x0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
289 psrl%1 m%4, shift ; ..(x1^x4)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
290 pxor m%5, m%4 ; x5x1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
291 SWAP %4, %3, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
292 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
293 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
294
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
295 %macro TRANS_SSE4 5-6 ; see above
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
296 %ifidn %1, d
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
297 mova m%5, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
298 %ifidn %2, ord
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
299 psrl%1 m%3, 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
300 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
301 pblendw m%3, m%4, 10101010b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
302 psll%1 m%4, 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
303 %ifidn %2, ord
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
304 pblendw m%4, m%5, 01010101b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
305 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
306 psrl%1 m%5, 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
307 por m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
308 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
309 %elifidn %1, q
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
310 mova m%5, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
311 shufps m%3, m%4, 10001000b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
312 shufps m%5, m%4, 11011101b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
313 SWAP %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
314 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
315 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
316
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
317 %macro HADAMARD 5-6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
318 ; %1=distance in words (0 for vertical pass, 1/2/4 for horizontal passes)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
319 ; %2=sumsub/max/amax (sum and diff / maximum / maximum of absolutes)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
320 ; %3/%4: regs
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
321 ; %5(%6): tmpregs
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
322 %if %1!=0 ; have to reorder stuff for horizontal op
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
323 %ifidn %2, sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
324 %define ORDER ord
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
325 ; sumsub needs order because a-b != b-a unless a=b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
326 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
327 %define ORDER unord
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
328 ; if we just max, order doesn't matter (allows pblendw+or in sse4)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
329 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
330 %if %1==1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
331 TRANS d, ORDER, %3, %4, %5, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
332 %elif %1==2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
333 %if mmsize==8
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
334 SBUTTERFLY dq, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
335 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
336 TRANS q, ORDER, %3, %4, %5, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
337 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
338 %elif %1==4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
339 SBUTTERFLY qdq, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
340 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
341 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
342 %ifidn %2, sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
343 SUMSUB_BA m%3, m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
344 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
345 %ifidn %2, amax
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
346 %if %0==6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
347 ABS2 m%3, m%4, m%5, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
348 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
349 ABS1 m%3, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
350 ABS1 m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
351 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
352 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
353 pmaxsw m%3, m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
354 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
355 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
356
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
357
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
358 %macro HADAMARD2_2D 6-7 sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
359 HADAMARD 0, sumsub, %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
360 HADAMARD 0, sumsub, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
361 SBUTTERFLY %6, %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
362 %ifnum %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
363 HADAMARD 0, amax, %1, %2, %5, %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
364 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
365 HADAMARD 0, %7, %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
366 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
367 SBUTTERFLY %6, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
368 %ifnum %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
369 HADAMARD 0, amax, %3, %4, %5, %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
370 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
371 HADAMARD 0, %7, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
372 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
373 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
374
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
375 %macro HADAMARD4_2D 5-6 sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
376 HADAMARD2_2D %1, %2, %3, %4, %5, wd
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
377 HADAMARD2_2D %1, %3, %2, %4, %5, dq, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
378 SWAP %2, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
379 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
380
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
381 %macro HADAMARD4_2D_SSE 5-6 sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
382 HADAMARD 0, sumsub, %1, %2, %5 ; 1st V row 0 + 1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
383 HADAMARD 0, sumsub, %3, %4, %5 ; 1st V row 2 + 3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
384 SBUTTERFLY wd, %1, %2, %5 ; %1: m0 1+0 %2: m1 1+0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
385 SBUTTERFLY wd, %3, %4, %5 ; %3: m0 3+2 %4: m1 3+2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
386 HADAMARD2_2D %1, %3, %2, %4, %5, dq
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
387 SBUTTERFLY qdq, %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
388 HADAMARD 0, %6, %1, %2, %5 ; 2nd H m1/m0 row 0+1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
389 SBUTTERFLY qdq, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
390 HADAMARD 0, %6, %3, %4, %5 ; 2nd H m1/m0 row 2+3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
391 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
392
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
393 %macro HADAMARD8_2D 9-10 sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
394 HADAMARD2_2D %1, %2, %3, %4, %9, wd
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
395 HADAMARD2_2D %5, %6, %7, %8, %9, wd
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
396 HADAMARD2_2D %1, %3, %2, %4, %9, dq
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
397 HADAMARD2_2D %5, %7, %6, %8, %9, dq
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
398 HADAMARD2_2D %1, %5, %3, %7, %9, qdq, %10
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
399 HADAMARD2_2D %2, %6, %4, %8, %9, qdq, %10
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
400 %ifnidn %10, amax
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
401 SWAP %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
402 SWAP %4, %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
403 %endif
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
404 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
405
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
406 %macro SUMSUB2_AB 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
407 mova %3, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
408 paddw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
409 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
410 psubw %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
411 psubw %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
412 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
413
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
414 %macro SUMSUB2_BA 3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
415 mova m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
416 paddw m%1, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
417 paddw m%1, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
418 psubw m%2, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
419 psubw m%2, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
420 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
421
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
422 %macro SUMSUBD2_AB 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
423 mova %4, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
424 mova %3, %2
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
425 psraw %2, 1 ; %2: %2>>1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
426 psraw %1, 1 ; %1: %1>>1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
427 paddw %2, %4 ; %2: %2>>1+%1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
428 psubw %1, %3 ; %1: %1>>1-%2
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
429 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
430
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
431 %macro DCT4_1D 5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
432 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
433 SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
434 SUMSUB_BA m%3, m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
435 SUMSUB2_AB m%1, m%2, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
436 SWAP %1, %3, %4, %5, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
437 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
438 SUMSUB_BADC m%4, m%1, m%3, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
439 SUMSUB_BA m%3, m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
440 mova [%5], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
441 SUMSUB2_AB m%1, [%5], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
442 SWAP %1, %3, %4, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
443 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
444 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
445
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
446 %macro IDCT4_1D 5-6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
447 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
448 SUMSUBD2_AB m%2, m%4, m%6, m%5
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
449 ; %2: %2>>1-%4 %4: %2+%4>>1
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
450 SUMSUB_BA m%3, m%1, m%6
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
451 ; %3: %1+%3 %1: %1-%3
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
452 SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
453 ; %4: %1+%3 + (%2+%4>>1)
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
454 ; %3: %1+%3 - (%2+%4>>1)
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
455 ; %2: %1-%3 + (%2>>1-%4)
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
456 ; %1: %1-%3 - (%2>>1-%4)
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
457 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
458 SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
459 SUMSUB_BA m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
460 SUMSUB_BADC m%4, m%3, m%2, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
461 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
462 SWAP %1, %4, %3
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
463 ; %1: %1+%3 + (%2+%4>>1) row0
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
464 ; %2: %1-%3 + (%2>>1-%4) row1
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
465 ; %3: %1-%3 - (%2>>1-%4) row2
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
466 ; %4: %1+%3 - (%2+%4>>1) row3
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
467 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
468
11931
980030a3e315 Update x264asm header files to latest versions.
darkshikari
parents: 10019
diff changeset
469
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
470 %macro LOAD_DIFF 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
471 %ifidn %3, none
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
472 movh %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
473 movh %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
474 punpcklbw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
475 punpcklbw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
476 psubw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
477 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
478 movh %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
479 punpcklbw %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
480 movh %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
481 punpcklbw %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
482 psubw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
483 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
484 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
485
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
486 %macro LOAD_DIFF8x4_SSE2 8
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
487 LOAD_DIFF m%1, m%5, m%6, [%7+%1*FENC_STRIDE], [%8+%1*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
488 LOAD_DIFF m%2, m%5, m%6, [%7+%2*FENC_STRIDE], [%8+%2*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
489 LOAD_DIFF m%3, m%5, m%6, [%7+%3*FENC_STRIDE], [%8+%3*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
490 LOAD_DIFF m%4, m%5, m%6, [%7+%4*FENC_STRIDE], [%8+%4*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
491 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
492
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
493 %macro LOAD_DIFF8x4_SSSE3 8 ; 4x dst, 1x tmp, 1x mul, 2x ptr
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
494 movh m%2, [%8+%1*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
495 movh m%1, [%7+%1*FENC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
496 punpcklbw m%1, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
497 movh m%3, [%8+%2*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
498 movh m%2, [%7+%2*FENC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
499 punpcklbw m%2, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
500 movh m%4, [%8+%3*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
501 movh m%3, [%7+%3*FENC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
502 punpcklbw m%3, m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
503 movh m%5, [%8+%4*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
504 movh m%4, [%7+%4*FENC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
505 punpcklbw m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
506 pmaddubsw m%1, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
507 pmaddubsw m%2, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
508 pmaddubsw m%3, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
509 pmaddubsw m%4, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
510 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
511
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
512 %macro STORE_DCT 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
513 movq [%5+%6+ 0], m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
514 movq [%5+%6+ 8], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
515 movq [%5+%6+16], m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
516 movq [%5+%6+24], m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
517 movhps [%5+%6+32], m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
518 movhps [%5+%6+40], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
519 movhps [%5+%6+48], m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
520 movhps [%5+%6+56], m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
521 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
522
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
523 %macro STORE_IDCT 4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
524 movhps [r0-4*FDEC_STRIDE], %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
525 movh [r0-3*FDEC_STRIDE], %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
526 movhps [r0-2*FDEC_STRIDE], %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
527 movh [r0-1*FDEC_STRIDE], %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
528 movhps [r0+0*FDEC_STRIDE], %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
529 movh [r0+1*FDEC_STRIDE], %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
530 movhps [r0+2*FDEC_STRIDE], %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
531 movh [r0+3*FDEC_STRIDE], %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
532 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
533
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
534 %macro LOAD_DIFF_8x4P 7-10 r0,r2,0 ; 4x dest, 2x temp, 2x pointer, increment?
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
535 LOAD_DIFF m%1, m%5, m%7, [%8], [%9]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
536 LOAD_DIFF m%2, m%6, m%7, [%8+r1], [%9+r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
537 LOAD_DIFF m%3, m%5, m%7, [%8+2*r1], [%9+2*r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
538 LOAD_DIFF m%4, m%6, m%7, [%8+r4], [%9+r5]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
539 %if %10
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
540 lea %8, [%8+4*r1]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
541 lea %9, [%9+4*r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
542 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
543 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
544
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
545 %macro DIFFx2 6-7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
546 movh %3, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
547 punpcklbw %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
548 psraw %1, 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
549 paddsw %1, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
550 movh %3, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
551 punpcklbw %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
552 psraw %2, 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
553 paddsw %2, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
554 packuswb %2, %1
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
555 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
556
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
557 %macro STORE_DIFF 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
558 movh %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
559 punpcklbw %2, %3
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
560 psraw %1, 6
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
561 paddsw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
562 packuswb %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
563 movh %4, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
564 %endmacro