annotate x86/x86util.asm @ 9473:e38284cd69dc libavcodec

Use memcpy instead of the very inefficient bytecopy where both are correct (i.e. no overlap of src and dst is possible).
author reimar
date Fri, 17 Apr 2009 17:20:48 +0000
parents ba83a0c57e9f
children c08ca946c80a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
1 ;*****************************************************************************
8804
ba83a0c57e9f Fix wrong file name in header, noticed by David DeHaven, dave sagetv com.
diego
parents: 8510
diff changeset
2 ;* x86util.asm
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
3 ;*****************************************************************************
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
4 ;* Copyright (C) 2008 Loren Merritt <lorenm@u.washington.edu>
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
5 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
6 ;* This program is free software; you can redistribute it and/or modify
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
7 ;* it under the terms of the GNU General Public License as published by
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
8 ;* the Free Software Foundation; either version 2 of the License, or
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
9 ;* (at your option) any later version.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
10 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
11 ;* This program is distributed in the hope that it will be useful,
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
14 ;* GNU General Public License for more details.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
15 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
16 ;* You should have received a copy of the GNU General Public License
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
17 ;* along with this program; if not, write to the Free Software
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
19 ;*****************************************************************************
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
20
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
21 %macro SBUTTERFLY 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
22 mova m%4, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
23 punpckl%1 m%2, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
24 punpckh%1 m%4, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
25 SWAP %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
26 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
27
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
28 %macro TRANSPOSE4x4W 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
29 SBUTTERFLY wd, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
30 SBUTTERFLY wd, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
31 SBUTTERFLY dq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
32 SBUTTERFLY dq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
33 SWAP %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
34 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
35
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
36 %macro TRANSPOSE2x4x4W 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
37 SBUTTERFLY wd, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
38 SBUTTERFLY wd, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
39 SBUTTERFLY dq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
40 SBUTTERFLY dq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
41 SBUTTERFLY qdq, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
42 SBUTTERFLY qdq, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
43 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
44
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
45 %macro TRANSPOSE4x4D 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
46 SBUTTERFLY dq, %1, %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
47 SBUTTERFLY dq, %3, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
48 SBUTTERFLY qdq, %1, %3, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
49 SBUTTERFLY qdq, %2, %4, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
50 SWAP %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
51 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
52
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
53 %macro TRANSPOSE8x8W 9-11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
54 %ifdef ARCH_X86_64
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
55 SBUTTERFLY wd, %1, %2, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
56 SBUTTERFLY wd, %3, %4, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
57 SBUTTERFLY wd, %5, %6, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
58 SBUTTERFLY wd, %7, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
59 SBUTTERFLY dq, %1, %3, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
60 SBUTTERFLY dq, %2, %4, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
61 SBUTTERFLY dq, %5, %7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
62 SBUTTERFLY dq, %6, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
63 SBUTTERFLY qdq, %1, %5, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
64 SBUTTERFLY qdq, %2, %6, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
65 SBUTTERFLY qdq, %3, %7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
66 SBUTTERFLY qdq, %4, %8, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
67 SWAP %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
68 SWAP %4, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
69 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
70 ; in: m0..m7, unless %11 in which case m6 is in %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
71 ; out: m0..m7, unless %11 in which case m4 is in %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
72 ; spills into %9 and %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
73 %if %0<11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
74 movdqa %9, m%7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
75 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
76 SBUTTERFLY wd, %1, %2, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
77 movdqa %10, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
78 movdqa m%7, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
79 SBUTTERFLY wd, %3, %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
80 SBUTTERFLY wd, %5, %6, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
81 SBUTTERFLY wd, %7, %8, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
82 SBUTTERFLY dq, %1, %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
83 movdqa %9, m%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
84 movdqa m%2, %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
85 SBUTTERFLY dq, %2, %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
86 SBUTTERFLY dq, %5, %7, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
87 SBUTTERFLY dq, %6, %8, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
88 SBUTTERFLY qdq, %1, %5, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
89 SBUTTERFLY qdq, %2, %6, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
90 movdqa %10, m%2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
91 movdqa m%3, %9
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
92 SBUTTERFLY qdq, %3, %7, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
93 SBUTTERFLY qdq, %4, %8, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
94 SWAP %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
95 SWAP %4, %7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
96 %if 0<11
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
97 movdqa m%5, %10
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
98 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
99 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
100 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
101
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
102 %macro ABS1_MMX 2 ; a, tmp
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
103 pxor %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
104 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
105 pmaxsw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
106 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
107
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
108 %macro ABS2_MMX 4 ; a, b, tmp0, tmp1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
109 pxor %3, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
110 pxor %4, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
111 psubw %3, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
112 psubw %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
113 pmaxsw %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
114 pmaxsw %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
115 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
116
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
117 %macro ABS1_SSSE3 2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
118 pabsw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
119 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
120
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
121 %macro ABS2_SSSE3 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
122 pabsw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
123 pabsw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
124 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
125
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
126 %define ABS1 ABS1_MMX
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
127 %define ABS2 ABS2_MMX
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
128
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
129 %macro ABS4 6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
130 ABS2 %1, %2, %5, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
131 ABS2 %3, %4, %5, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
132 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
133
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
134 %macro SPLATB_MMX 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
135 movd %1, [%2-3] ;to avoid crossing a cacheline
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
136 punpcklbw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
137 %if mmsize==16
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
138 pshuflw %1, %1, 0xff
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
139 punpcklqdq %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
140 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
141 pshufw %1, %1, 0xff
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
142 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
143 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
144
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
145 %macro SPLATB_SSSE3 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
146 movd %1, [%2-3]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
147 pshufb %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
148 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
149
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
150 %macro PALIGNR_MMX 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
151 %ifnidn %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
152 mova %4, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
153 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
154 %if mmsize == 8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
155 psllq %1, (8-%3)*8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
156 psrlq %4, %3*8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
157 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
158 pslldq %1, 16-%3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
159 psrldq %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
160 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
161 por %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
162 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
163
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
164 %macro PALIGNR_SSSE3 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
165 palignr %1, %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
166 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
167
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
168 %macro SUMSUB_BA 2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
169 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
170 paddw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
171 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
172 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
173
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
174 %macro SUMSUB_BADC 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
175 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
176 paddw %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
177 paddw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
178 paddw %4, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
179 psubw %2, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
180 psubw %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
181 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
182
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
183 %macro HADAMARD8_1D 8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
184 SUMSUB_BADC %1, %5, %2, %6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
185 SUMSUB_BADC %3, %7, %4, %8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
186 SUMSUB_BADC %1, %3, %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
187 SUMSUB_BADC %5, %7, %6, %8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
188 SUMSUB_BADC %1, %2, %3, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
189 SUMSUB_BADC %5, %6, %7, %8
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
190 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
191
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
192 %macro SUMSUB2_AB 3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
193 mova %3, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
194 paddw %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
195 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
196 psubw %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
197 psubw %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
198 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
199
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
200 %macro SUMSUBD2_AB 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
201 mova %4, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
202 mova %3, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
203 psraw %2, 1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
204 psraw %4, 1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
205 paddw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
206 psubw %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
207 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
208
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
209 %macro LOAD_DIFF 5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
210 %ifidn %3, none
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
211 movh %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
212 movh %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
213 punpcklbw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
214 punpcklbw %2, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
215 psubw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
216 %else
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
217 movh %1, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
218 punpcklbw %1, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
219 movh %2, %5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
220 punpcklbw %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
221 psubw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
222 %endif
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
223 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
224
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
225 %macro LOAD_DIFF_8x4P 6-8 r0,r2 ; 4x dest, 2x temp, 2x pointer
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
226 LOAD_DIFF %1, %5, none, [%7], [%8]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
227 LOAD_DIFF %2, %6, none, [%7+r1], [%8+r3]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
228 LOAD_DIFF %3, %5, none, [%7+2*r1], [%8+2*r3]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
229 LOAD_DIFF %4, %6, none, [%7+r4], [%8+r5]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
230 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
231
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
232 %macro STORE_DIFF 4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
233 psraw %1, 6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
234 movh %2, %4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
235 punpcklbw %2, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
236 paddsw %1, %2
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
237 packuswb %1, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
238 movh %4, %1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
239 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
240