annotate x86/x86util.asm @ 11352:6e0af2cfdcfe libavcodec

Do MC and IDCT in coding (hilbert) order This increases the slice size to 64 pixels, due to having to decode an entire chroma superblock row per slice. This can be up to 6% slower depending on clip and CPU, but is necessary for future optimizations that gain significantly more than was lost.
author conrad
date Wed, 03 Mar 2010 23:27:40 +0000
parents c08ca946c80a
children 980030a3e315
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
1 ;*****************************************************************************
8804
ba83a0c57e9f Fix wrong file name in header, noticed by David DeHaven, dave sagetv com.
diego
parents: 8510
diff changeset
2 ;* x86util.asm
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
3 ;*****************************************************************************
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
4 ;* Copyright (C) 2008 Loren Merritt <lorenm@u.washington.edu>
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
5 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
6 ;* This program is free software; you can redistribute it and/or modify
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
7 ;* it under the terms of the GNU General Public License as published by
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
8 ;* the Free Software Foundation; either version 2 of the License, or
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
9 ;* (at your option) any later version.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
10 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
11 ;* This program is distributed in the hope that it will be useful,
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
14 ;* GNU General Public License for more details.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
15 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
16 ;* You should have received a copy of the GNU General Public License
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
17 ;* along with this program; if not, write to the Free Software
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
19 ;*****************************************************************************
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
20
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
21 %macro SBUTTERFLY 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
22 mova m%4, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
23 punpckl%1 m%2, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
24 punpckh%1 m%4, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
25 SWAP %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
26 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
27
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
28 %macro TRANSPOSE4x4W 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
29 SBUTTERFLY wd, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
30 SBUTTERFLY wd, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
31 SBUTTERFLY dq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
32 SBUTTERFLY dq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
33 SWAP %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
34 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
35
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
36 %macro TRANSPOSE2x4x4W 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
37 SBUTTERFLY wd, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
38 SBUTTERFLY wd, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
39 SBUTTERFLY dq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
40 SBUTTERFLY dq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
41 SBUTTERFLY qdq, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
42 SBUTTERFLY qdq, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
43 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
44
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
45 %macro TRANSPOSE4x4D 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
46 SBUTTERFLY dq, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
47 SBUTTERFLY dq, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
48 SBUTTERFLY qdq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
49 SBUTTERFLY qdq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
50 SWAP %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
51 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
52
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
53 %macro TRANSPOSE8x8W 9-11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
54 %ifdef ARCH_X86_64
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
55 SBUTTERFLY wd, %1, %2, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
56 SBUTTERFLY wd, %3, %4, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
57 SBUTTERFLY wd, %5, %6, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
58 SBUTTERFLY wd, %7, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
59 SBUTTERFLY dq, %1, %3, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
60 SBUTTERFLY dq, %2, %4, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
61 SBUTTERFLY dq, %5, %7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
62 SBUTTERFLY dq, %6, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
63 SBUTTERFLY qdq, %1, %5, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
64 SBUTTERFLY qdq, %2, %6, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
65 SBUTTERFLY qdq, %3, %7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
66 SBUTTERFLY qdq, %4, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
67 SWAP %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
68 SWAP %4, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
69 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
70 ; in: m0..m7, unless %11 in which case m6 is in %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
71 ; out: m0..m7, unless %11 in which case m4 is in %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
72 ; spills into %9 and %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
73 %if %0<11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
74 movdqa %9, m%7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
75 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
76 SBUTTERFLY wd, %1, %2, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
77 movdqa %10, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
78 movdqa m%7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
79 SBUTTERFLY wd, %3, %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
80 SBUTTERFLY wd, %5, %6, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
81 SBUTTERFLY wd, %7, %8, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
82 SBUTTERFLY dq, %1, %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
83 movdqa %9, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
84 movdqa m%2, %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
85 SBUTTERFLY dq, %2, %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
86 SBUTTERFLY dq, %5, %7, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
87 SBUTTERFLY dq, %6, %8, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
88 SBUTTERFLY qdq, %1, %5, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
89 SBUTTERFLY qdq, %2, %6, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
90 movdqa %10, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
91 movdqa m%3, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
92 SBUTTERFLY qdq, %3, %7, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
93 SBUTTERFLY qdq, %4, %8, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
94 SWAP %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
95 SWAP %4, %7
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
96 %if %0<11
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
97 movdqa m%5, %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
98 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
99 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
100 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
101
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
102 %macro ABS1_MMX 2 ; a, tmp
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
103 pxor %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
104 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
105 pmaxsw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
106 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
107
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
108 %macro ABS2_MMX 4 ; a, b, tmp0, tmp1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
109 pxor %3, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
110 pxor %4, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
111 psubw %3, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
112 psubw %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
113 pmaxsw %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
114 pmaxsw %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
115 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
116
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
117 %macro ABS1_SSSE3 2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
118 pabsw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
119 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
120
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
121 %macro ABS2_SSSE3 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
122 pabsw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
123 pabsw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
124 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
125
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
126 %define ABS1 ABS1_MMX
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
127 %define ABS2 ABS2_MMX
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
128
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
129 %macro ABS4 6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
130 ABS2 %1, %2, %5, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
131 ABS2 %3, %4, %5, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
132 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
133
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
134 %macro SPLATB_MMX 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
135 movd %1, [%2-3] ;to avoid crossing a cacheline
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
136 punpcklbw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
137 %if mmsize==16
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
138 pshuflw %1, %1, 0xff
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
139 punpcklqdq %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
140 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
141 pshufw %1, %1, 0xff
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
142 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
143 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
144
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
145 %macro SPLATB_SSSE3 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
146 movd %1, [%2-3]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
147 pshufb %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
148 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
149
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
150 %macro PALIGNR_MMX 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
151 %ifnidn %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
152 mova %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
153 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
154 %if mmsize == 8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
155 psllq %1, (8-%3)*8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
156 psrlq %4, %3*8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
157 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
158 pslldq %1, 16-%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
159 psrldq %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
160 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
161 por %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
162 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
163
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
164 %macro PALIGNR_SSSE3 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
165 palignr %1, %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
166 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
167
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
168 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
169 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
170 mova m%1, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
171 mova m%3, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
172 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
173 mova m%1, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
174 mova m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
175 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
176 pand m%1, m%2 ; dst .. y6 .. y4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
177 pand m%3, m%4 ; src .. y6 .. y4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
178 psrlw m%2, 8 ; dst .. y7 .. y5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
179 psrlw m%4, 8 ; src .. y7 .. y5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
180 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
181
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
182 %macro SUMSUB_BA 2-3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
183 %if %0==2
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
184 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
185 paddw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
186 psubw %2, %1
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
187 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
188 mova %3, %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
189 paddw %1, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
190 psubw %2, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
191 %endif
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
192 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
193
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
194 %macro SUMSUB_BADC 4-5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
195 %if %0==5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
196 SUMSUB_BA %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
197 SUMSUB_BA %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
198 %else
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
199 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
200 paddw %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
201 paddw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
202 paddw %4, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
203 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
204 psubw %4, %3
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
205 %endif
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
206 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
207
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
208 %macro HADAMARD4_V 4+
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
209 SUMSUB_BADC %1, %2, %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
210 SUMSUB_BADC %1, %3, %2, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
211 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
212
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
213 %macro HADAMARD8_V 8+
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
214 SUMSUB_BADC %1, %2, %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
215 SUMSUB_BADC %5, %6, %7, %8
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
216 SUMSUB_BADC %1, %3, %2, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
217 SUMSUB_BADC %5, %7, %6, %8
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
218 SUMSUB_BADC %1, %5, %2, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
219 SUMSUB_BADC %3, %7, %4, %8
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
220 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
221
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
222 %macro TRANS_SSE2 5-6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
223 ; TRANSPOSE2x2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
224 ; %1: transpose width (d/q) - use SBUTTERFLY qdq for dq
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
225 ; %2: ord/unord (for compat with sse4, unused)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
226 ; %3/%4: source regs
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
227 ; %5/%6: tmp regs
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
228 %ifidn %1, d
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
229 %define mask [mask_10 GLOBAL]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
230 %define shift 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
231 %elifidn %1, q
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
232 %define mask [mask_1100 GLOBAL]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
233 %define shift 32
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
234 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
235 %if %0==6 ; less dependency if we have two tmp
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
236 mova m%5, mask ; ff00
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
237 mova m%6, m%4 ; x5x4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
238 psll%1 m%4, shift ; x4..
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
239 pand m%6, m%5 ; x5..
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
240 pandn m%5, m%3 ; ..x0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
241 psrl%1 m%3, shift ; ..x1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
242 por m%4, m%5 ; x4x0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
243 por m%3, m%6 ; x5x1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
244 %else ; more dependency, one insn less. sometimes faster, sometimes not
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
245 mova m%5, m%4 ; x5x4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
246 psll%1 m%4, shift ; x4..
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
247 pxor m%4, m%3 ; (x4^x1)x0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
248 pand m%4, mask ; (x4^x1)..
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
249 pxor m%3, m%4 ; x4x0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
250 psrl%1 m%4, shift ; ..(x1^x4)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
251 pxor m%5, m%4 ; x5x1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
252 SWAP %4, %3, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
253 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
254 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
255
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
256 %macro TRANS_SSE4 5-6 ; see above
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
257 %ifidn %1, d
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
258 mova m%5, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
259 %ifidn %2, ord
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
260 psrl%1 m%3, 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
261 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
262 pblendw m%3, m%4, 10101010b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
263 psll%1 m%4, 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
264 %ifidn %2, ord
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
265 pblendw m%4, m%5, 01010101b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
266 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
267 psrl%1 m%5, 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
268 por m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
269 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
270 %elifidn %1, q
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
271 mova m%5, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
272 shufps m%3, m%4, 10001000b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
273 shufps m%5, m%4, 11011101b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
274 SWAP %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
275 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
276 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
277
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
278 %macro HADAMARD 5-6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
279 ; %1=distance in words (0 for vertical pass, 1/2/4 for horizontal passes)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
280 ; %2=sumsub/max/amax (sum and diff / maximum / maximum of absolutes)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
281 ; %3/%4: regs
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
282 ; %5(%6): tmpregs
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
283 %if %1!=0 ; have to reorder stuff for horizontal op
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
284 %ifidn %2, sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
285 %define ORDER ord
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
286 ; sumsub needs order because a-b != b-a unless a=b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
287 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
288 %define ORDER unord
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
289 ; if we just max, order doesn't matter (allows pblendw+or in sse4)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
290 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
291 %if %1==1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
292 TRANS d, ORDER, %3, %4, %5, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
293 %elif %1==2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
294 %if mmsize==8
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
295 SBUTTERFLY dq, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
296 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
297 TRANS q, ORDER, %3, %4, %5, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
298 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
299 %elif %1==4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
300 SBUTTERFLY qdq, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
301 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
302 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
303 %ifidn %2, sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
304 SUMSUB_BA m%3, m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
305 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
306 %ifidn %2, amax
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
307 %if %0==6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
308 ABS2 m%3, m%4, m%5, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
309 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
310 ABS1 m%3, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
311 ABS1 m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
312 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
313 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
314 pmaxsw m%3, m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
315 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
316 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
317
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
318
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
319 %macro HADAMARD2_2D 6-7 sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
320 HADAMARD 0, sumsub, %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
321 HADAMARD 0, sumsub, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
322 SBUTTERFLY %6, %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
323 %ifnum %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
324 HADAMARD 0, amax, %1, %2, %5, %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
325 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
326 HADAMARD 0, %7, %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
327 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
328 SBUTTERFLY %6, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
329 %ifnum %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
330 HADAMARD 0, amax, %3, %4, %5, %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
331 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
332 HADAMARD 0, %7, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
333 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
334 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
335
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
336 %macro HADAMARD4_2D 5-6 sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
337 HADAMARD2_2D %1, %2, %3, %4, %5, wd
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
338 HADAMARD2_2D %1, %3, %2, %4, %5, dq, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
339 SWAP %2, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
340 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
341
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
342 %macro HADAMARD4_2D_SSE 5-6 sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
343 HADAMARD 0, sumsub, %1, %2, %5 ; 1st V row 0 + 1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
344 HADAMARD 0, sumsub, %3, %4, %5 ; 1st V row 2 + 3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
345 SBUTTERFLY wd, %1, %2, %5 ; %1: m0 1+0 %2: m1 1+0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
346 SBUTTERFLY wd, %3, %4, %5 ; %3: m0 3+2 %4: m1 3+2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
347 HADAMARD2_2D %1, %3, %2, %4, %5, dq
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
348 SBUTTERFLY qdq, %1, %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
349 HADAMARD 0, %6, %1, %2, %5 ; 2nd H m1/m0 row 0+1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
350 SBUTTERFLY qdq, %3, %4, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
351 HADAMARD 0, %6, %3, %4, %5 ; 2nd H m1/m0 row 2+3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
352 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
353
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
354 %macro HADAMARD8_2D 9-10 sumsub
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
355 HADAMARD2_2D %1, %2, %3, %4, %9, wd
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
356 HADAMARD2_2D %5, %6, %7, %8, %9, wd
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
357 HADAMARD2_2D %1, %3, %2, %4, %9, dq
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
358 HADAMARD2_2D %5, %7, %6, %8, %9, dq
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
359 HADAMARD2_2D %1, %5, %3, %7, %9, qdq, %10
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
360 HADAMARD2_2D %2, %6, %4, %8, %9, qdq, %10
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
361 %ifnidn %10, amax
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
362 SWAP %2, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
363 SWAP %4, %7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
364 %endif
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
365 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
366
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
367 %macro SUMSUB2_AB 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
368 mova %3, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
369 paddw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
370 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
371 psubw %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
372 psubw %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
373 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
374
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
375 %macro SUMSUB2_BA 3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
376 mova m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
377 paddw m%1, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
378 paddw m%1, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
379 psubw m%2, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
380 psubw m%2, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
381 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
382
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
383 %macro SUMSUBD2_AB 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
384 mova %4, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
385 mova %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
386 psraw %2, 1
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
387 psraw %1, 1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
388 paddw %2, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
389 psubw %1, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
390 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
391
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
392 %macro DCT4_1D 5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
393 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
394 SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
395 SUMSUB_BA m%3, m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
396 SUMSUB2_AB m%1, m%2, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
397 SWAP %1, %3, %4, %5, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
398 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
399 SUMSUB_BADC m%4, m%1, m%3, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
400 SUMSUB_BA m%3, m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
401 mova [%5], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
402 SUMSUB2_AB m%1, [%5], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
403 SWAP %1, %3, %4, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
404 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
405 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
406
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
407 %macro IDCT4_1D 5-6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
408 %ifnum %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
409 SUMSUBD2_AB m%2, m%4, m%6, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
410 SUMSUB_BA m%3, m%1, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
411 SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
412 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
413 SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
414 SUMSUB_BA m%3, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
415 SUMSUB_BADC m%4, m%3, m%2, m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
416 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
417 SWAP %1, %4, %3
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
418 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
419
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
420 %macro LOAD_DIFF 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
421 %ifidn %3, none
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
422 movh %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
423 movh %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
424 punpcklbw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
425 punpcklbw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
426 psubw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
427 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
428 movh %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
429 punpcklbw %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
430 movh %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
431 punpcklbw %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
432 psubw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
433 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
434 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
435
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
436 %macro LOAD_DIFF8x4_SSE2 8
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
437 LOAD_DIFF m%1, m%5, m%6, [%7+%1*FENC_STRIDE], [%8+%1*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
438 LOAD_DIFF m%2, m%5, m%6, [%7+%2*FENC_STRIDE], [%8+%2*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
439 LOAD_DIFF m%3, m%5, m%6, [%7+%3*FENC_STRIDE], [%8+%3*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
440 LOAD_DIFF m%4, m%5, m%6, [%7+%4*FENC_STRIDE], [%8+%4*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
441 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
442
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
443 %macro LOAD_DIFF8x4_SSSE3 8 ; 4x dst, 1x tmp, 1x mul, 2x ptr
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
444 movh m%2, [%8+%1*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
445 movh m%1, [%7+%1*FENC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
446 punpcklbw m%1, m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
447 movh m%3, [%8+%2*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
448 movh m%2, [%7+%2*FENC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
449 punpcklbw m%2, m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
450 movh m%4, [%8+%3*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
451 movh m%3, [%7+%3*FENC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
452 punpcklbw m%3, m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
453 movh m%5, [%8+%4*FDEC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
454 movh m%4, [%7+%4*FENC_STRIDE]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
455 punpcklbw m%4, m%5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
456 pmaddubsw m%1, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
457 pmaddubsw m%2, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
458 pmaddubsw m%3, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
459 pmaddubsw m%4, m%6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
460 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
461
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
462 %macro STORE_DCT 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
463 movq [%5+%6+ 0], m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
464 movq [%5+%6+ 8], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
465 movq [%5+%6+16], m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
466 movq [%5+%6+24], m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
467 movhps [%5+%6+32], m%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
468 movhps [%5+%6+40], m%2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
469 movhps [%5+%6+48], m%3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
470 movhps [%5+%6+56], m%4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
471 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
472
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
473 %macro STORE_IDCT 4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
474 movhps [r0-4*FDEC_STRIDE], %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
475 movh [r0-3*FDEC_STRIDE], %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
476 movhps [r0-2*FDEC_STRIDE], %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
477 movh [r0-1*FDEC_STRIDE], %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
478 movhps [r0+0*FDEC_STRIDE], %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
479 movh [r0+1*FDEC_STRIDE], %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
480 movhps [r0+2*FDEC_STRIDE], %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
481 movh [r0+3*FDEC_STRIDE], %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
482 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
483
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
484 %macro LOAD_DIFF_8x4P 7-10 r0,r2,0 ; 4x dest, 2x temp, 2x pointer, increment?
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
485 LOAD_DIFF m%1, m%5, m%7, [%8], [%9]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
486 LOAD_DIFF m%2, m%6, m%7, [%8+r1], [%9+r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
487 LOAD_DIFF m%3, m%5, m%7, [%8+2*r1], [%9+2*r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
488 LOAD_DIFF m%4, m%6, m%7, [%8+r4], [%9+r5]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
489 %if %10
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
490 lea %8, [%8+4*r1]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
491 lea %9, [%9+4*r3]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
492 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
493 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
494
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
495 %macro DIFFx2 6-7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
496 movh %3, %5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
497 punpcklbw %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
498 psraw %1, 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
499 paddsw %1, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
500 movh %3, %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
501 punpcklbw %3, %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
502 psraw %2, 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
503 paddsw %2, %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
504 packuswb %2, %1
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
505 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
506
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
507 %macro STORE_DIFF 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
508 movh %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
509 punpcklbw %2, %3
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8804
diff changeset
510 psraw %1, 6
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
511 paddsw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
512 packuswb %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
513 movh %4, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
514 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
515