annotate x86/x86inc.asm @ 10430:12c8175d6db5 libavcodec

simd add_hfyu_left_prediction 2.2x faster than C on conroe, 3.6x on penryn. 4-6% faster huffyuv decoding if using left or plane mode and yuv
author lorenm
date Sun, 18 Oct 2009 20:10:10 +0000
parents c08ca946c80a
children 669965580b72
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1 ;*****************************************************************************
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2 ;* x86inc.asm
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
3 ;*****************************************************************************
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
4 ;* Copyright (C) 2005-2008 Loren Merritt <lorenm@u.washington.edu>
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
5 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
6 ;* This file is part of FFmpeg.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
7 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
8 ;* FFmpeg is free software; you can redistribute it and/or
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
9 ;* modify it under the terms of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
10 ;* License as published by the Free Software Foundation; either
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
11 ;* version 2.1 of the License, or (at your option) any later version.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
12 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
13 ;* FFmpeg is distributed in the hope that it will be useful,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
16 ;* Lesser General Public License for more details.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
17 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
18 ;* You should have received a copy of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
19 ;* License along with FFmpeg; if not, write to the Free Software
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
21 ;*****************************************************************************
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
22
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
23 %ifdef ARCH_X86_64
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
24 %ifidn __OUTPUT_FORMAT__,win32
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
25 %define WIN64
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
26 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
27 %define UNIX64
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
28 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
29 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
30
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
31 ; FIXME: All of the 64bit asm functions that take a stride as an argument
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
32 ; via register, assume that the high dword of that register is filled with 0.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
33 ; This is true in practice (since we never do any 64bit arithmetic on strides,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
34 ; and x264's strides are all positive), but is not guaranteed by the ABI.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
35
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
36 ; Name of the .rodata section.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
37 ; Kludge: Something on OS X fails to align .rodata even given an align attribute,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
38 ; so use a different read-only section.
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
39 %macro SECTION_RODATA 0-1 16
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
40 %ifidn __OUTPUT_FORMAT__,macho64
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
41 SECTION .text align=%1
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
42 %elifidn __OUTPUT_FORMAT__,macho
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
43 SECTION .text align=%1
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
44 fakegot:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
45 %else
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
46 SECTION .rodata align=%1
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
47 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
48 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
49
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
50 ; PIC support macros.
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
51 ; x86_64 can't fit 64bit address literals in most instruction types,
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
52 ; so shared objects (under the assumption that they might be anywhere
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
53 ; in memory) must use an address mode that does fit.
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
54 ; So all accesses to global variables must use this macro, e.g.
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
55 ; mov eax, [foo GLOBAL]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
56 ; instead of
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
57 ; mov eax, [foo]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
58 ;
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
59 ; x86_32 doesn't require PIC.
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
60 ; Some distros prefer shared objects to be PIC, but nothing breaks if
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
61 ; the code contains a few textrels, so we'll skip that complexity.
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
62
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
63 %ifdef WIN64
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
64 %define PIC
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
65 %elifndef ARCH_X86_64
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
66 %undef PIC
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
67 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
68 %ifdef PIC
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
69 %define GLOBAL wrt rip
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
70 %else
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
71 %define GLOBAL
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
72 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
73
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
74 ; Macros to eliminate most code duplication between x86_32 and x86_64:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
75 ; Currently this works only for leaf functions which load all their arguments
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
76 ; into registers at the start, and make no other use of the stack. Luckily that
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
77 ; covers most of x264's asm.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
78
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
79 ; PROLOGUE:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
80 ; %1 = number of arguments. loads them from stack if needed.
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
81 ; %2 = number of registers used. pushes callee-saved regs if needed.
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
82 ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
83 ; %4 = list of names to define to registers
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
84 ; PROLOGUE can also be invoked by adding the same options to cglobal
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
85
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
86 ; e.g.
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
87 ; cglobal foo, 2,3, dst, src, tmp
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
88 ; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
89
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
90 ; TODO Some functions can use some args directly from the stack. If they're the
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
91 ; last args then you can just not declare them, but if they're in the middle
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
92 ; we need more flexible macro.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
93
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
94 ; RET:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
95 ; Pops anything that was pushed by PROLOGUE
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
96
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
97 ; REP_RET:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
98 ; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
99 ; which are slow when a normal ret follows a branch.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
100
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
101 ; registers:
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
102 ; rN and rNq are the native-size register holding function argument N
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
103 ; rNd, rNw, rNb are dword, word, and byte size
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
104 ; rNm is the original location of arg N (a register or on the stack), dword
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
105 ; rNmp is native size
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
106
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
107 %macro DECLARE_REG 6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
108 %define r%1q %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
109 %define r%1d %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
110 %define r%1w %4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
111 %define r%1b %5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
112 %define r%1m %6
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
113 %ifid %6 ; i.e. it's a register
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
114 %define r%1mp %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
115 %elifdef ARCH_X86_64 ; memory
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
116 %define r%1mp qword %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
117 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
118 %define r%1mp dword %6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
119 %endif
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
120 %define r%1 %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
121 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
122
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
123 %macro DECLARE_REG_SIZE 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
124 %define r%1q r%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
125 %define e%1q r%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
126 %define r%1d e%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
127 %define e%1d e%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
128 %define r%1w %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
129 %define e%1w %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
130 %define r%1b %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
131 %define e%1b %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
132 %ifndef ARCH_X86_64
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
133 %define r%1 e%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
134 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
135 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
136
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
137 DECLARE_REG_SIZE ax, al
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
138 DECLARE_REG_SIZE bx, bl
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
139 DECLARE_REG_SIZE cx, cl
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
140 DECLARE_REG_SIZE dx, dl
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
141 DECLARE_REG_SIZE si, sil
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
142 DECLARE_REG_SIZE di, dil
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
143 DECLARE_REG_SIZE bp, bpl
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
144
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
145 ; t# defines for when per-arch register allocation is more complex than just function arguments
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
146
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
147 %macro DECLARE_REG_TMP 1-*
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
148 %assign %%i 0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
149 %rep %0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
150 CAT_XDEFINE t, %%i, r%1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
151 %assign %%i %%i+1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
152 %rotate 1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
153 %endrep
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
154 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
155
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
156 %macro DECLARE_REG_TMP_SIZE 0-*
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
157 %rep %0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
158 %define t%1q t%1 %+ q
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
159 %define t%1d t%1 %+ d
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
160 %define t%1w t%1 %+ w
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
161 %define t%1b t%1 %+ b
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
162 %rotate 1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
163 %endrep
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
164 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
165
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
166 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
167
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
168 %ifdef ARCH_X86_64
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
169 %define gprsize 8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
170 %else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
171 %define gprsize 4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
172 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
173
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
174 %macro PUSH 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
175 push %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
176 %assign stack_offset stack_offset+gprsize
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
177 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
178
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
179 %macro POP 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
180 pop %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
181 %assign stack_offset stack_offset-gprsize
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
182 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
183
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
184 %macro SUB 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
185 sub %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
186 %ifidn %1, rsp
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
187 %assign stack_offset stack_offset+(%2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
188 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
189 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
190
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
191 %macro ADD 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
192 add %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
193 %ifidn %1, rsp
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
194 %assign stack_offset stack_offset-(%2)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
195 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
196 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
197
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
198 %macro movifnidn 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
199 %ifnidn %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
200 mov %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
201 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
202 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
203
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
204 %macro movsxdifnidn 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
205 %ifnidn %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
206 movsxd %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
207 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
208 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
209
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
210 %macro ASSERT 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
211 %if (%1) == 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
212 %error assert failed
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
213 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
214 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
215
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
216 %macro DEFINE_ARGS 0-*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
217 %ifdef n_arg_names
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
218 %assign %%i 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
219 %rep n_arg_names
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
220 CAT_UNDEF arg_name %+ %%i, q
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
221 CAT_UNDEF arg_name %+ %%i, d
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
222 CAT_UNDEF arg_name %+ %%i, w
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
223 CAT_UNDEF arg_name %+ %%i, b
10430
12c8175d6db5 simd add_hfyu_left_prediction
lorenm
parents: 10019
diff changeset
224 CAT_UNDEF arg_name %+ %%i, m
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
225 CAT_UNDEF arg_name, %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
226 %assign %%i %%i+1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
227 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
228 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
229
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
230 %assign %%i 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
231 %rep %0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
232 %xdefine %1q r %+ %%i %+ q
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
233 %xdefine %1d r %+ %%i %+ d
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
234 %xdefine %1w r %+ %%i %+ w
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
235 %xdefine %1b r %+ %%i %+ b
10430
12c8175d6db5 simd add_hfyu_left_prediction
lorenm
parents: 10019
diff changeset
236 %xdefine %1m r %+ %%i %+ m
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
237 CAT_XDEFINE arg_name, %%i, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
238 %assign %%i %%i+1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
239 %rotate 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
240 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
241 %assign n_arg_names %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
242 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
243
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
244 %ifdef WIN64 ; Windows x64 ;=================================================
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
245
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
246 DECLARE_REG 0, rcx, ecx, cx, cl, ecx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
247 DECLARE_REG 1, rdx, edx, dx, dl, edx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
248 DECLARE_REG 2, r8, r8d, r8w, r8b, r8d
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
249 DECLARE_REG 3, r9, r9d, r9w, r9b, r9d
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
250 DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
251 DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
252 DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
253 %define r7m [rsp + stack_offset + 64]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
254 %define r8m [rsp + stack_offset + 72]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
255
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
256 %macro LOAD_IF_USED 2 ; reg_id, number_of_args
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
257 %if %1 < %2
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
258 mov r%1, [rsp + stack_offset + 8 + %1*8]
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
259 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
260 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
261
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
262 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
263 ASSERT %2 >= %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
264 %assign regs_used %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
265 ASSERT regs_used <= 7
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
266 %if %0 > 2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
267 %assign xmm_regs_used %3
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
268 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
269 %assign xmm_regs_used 0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
270 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
271 ASSERT xmm_regs_used <= 16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
272 %if regs_used > 4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
273 push r4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
274 push r5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
275 %assign stack_offset stack_offset+16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
276 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
277 %if xmm_regs_used > 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
278 sub rsp, (xmm_regs_used-6)*16+16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
279 %assign stack_offset stack_offset+(xmm_regs_used-6)*16+16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
280 %assign %%i xmm_regs_used
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
281 %rep (xmm_regs_used-6)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
282 %assign %%i %%i-1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
283 movdqa [rsp + (%%i-6)*16+8], xmm %+ %%i
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
284 %endrep
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
285 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
286 LOAD_IF_USED 4, %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
287 LOAD_IF_USED 5, %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
288 LOAD_IF_USED 6, %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
289 DEFINE_ARGS %4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
290 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
291
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
292 %macro RESTORE_XMM_INTERNAL 1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
293 %if xmm_regs_used > 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
294 %assign %%i xmm_regs_used
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
295 %rep (xmm_regs_used-6)
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
296 %assign %%i %%i-1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
297 movdqa xmm %+ %%i, [%1 + (%%i-6)*16+8]
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
298 %endrep
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
299 add %1, (xmm_regs_used-6)*16+16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
300 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
301 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
302
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
303 %macro RESTORE_XMM 1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
304 RESTORE_XMM_INTERNAL %1
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
305 %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
306 %assign xmm_regs_used 0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
307 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
308
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
309 %macro RET 0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
310 RESTORE_XMM_INTERNAL rsp
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
311 %if regs_used > 4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
312 pop r5
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
313 pop r4
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
314 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
315 ret
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
316 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
317
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
318 %macro REP_RET 0
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
319 %if regs_used > 4 || xmm_regs_used > 6
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
320 RET
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
321 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
322 rep ret
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
323 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
324 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
325
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
326 %elifdef ARCH_X86_64 ; *nix x64 ;=============================================
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
327
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
328 DECLARE_REG 0, rdi, edi, di, dil, edi
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
329 DECLARE_REG 1, rsi, esi, si, sil, esi
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
330 DECLARE_REG 2, rdx, edx, dx, dl, edx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
331 DECLARE_REG 3, rcx, ecx, cx, cl, ecx
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
332 DECLARE_REG 4, r8, r8d, r8w, r8b, r8d
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
333 DECLARE_REG 5, r9, r9d, r9w, r9b, r9d
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
334 DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
335 %define r7m [rsp + stack_offset + 16]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
336 %define r8m [rsp + stack_offset + 24]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
337
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
338 %macro LOAD_IF_USED 2 ; reg_id, number_of_args
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
339 %if %1 < %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
340 mov r%1, [rsp - 40 + %1*8]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
341 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
342 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
343
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
344 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
345 ASSERT %2 >= %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
346 ASSERT %2 <= 7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
347 LOAD_IF_USED 6, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
348 DEFINE_ARGS %4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
349 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
350
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
351 %macro RET 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
352 ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
353 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
354
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
355 %macro REP_RET 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
356 rep ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
357 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
358
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
359 %else ; X86_32 ;==============================================================
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
360
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
361 DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
362 DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
363 DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
364 DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
365 DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
366 DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
367 DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
368 %define r7m [esp + stack_offset + 32]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
369 %define r8m [esp + stack_offset + 36]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
370 %define rsp esp
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
371
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
372 %macro PUSH_IF_USED 1 ; reg_id
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
373 %if %1 < regs_used
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
374 push r%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
375 %assign stack_offset stack_offset+4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
376 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
377 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
378
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
379 %macro POP_IF_USED 1 ; reg_id
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
380 %if %1 < regs_used
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
381 pop r%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
382 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
383 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
384
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
385 %macro LOAD_IF_USED 2 ; reg_id, number_of_args
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
386 %if %1 < %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
387 mov r%1, [esp + stack_offset + 4 + %1*4]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
388 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
389 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
390
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
391 %macro PROLOGUE 2-4+ ; #args, #regs, arg_names...
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
392 ASSERT %2 >= %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
393 %assign regs_used %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
394 ASSERT regs_used <= 7
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
395 PUSH_IF_USED 3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
396 PUSH_IF_USED 4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
397 PUSH_IF_USED 5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
398 PUSH_IF_USED 6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
399 LOAD_IF_USED 0, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
400 LOAD_IF_USED 1, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
401 LOAD_IF_USED 2, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
402 LOAD_IF_USED 3, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
403 LOAD_IF_USED 4, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
404 LOAD_IF_USED 5, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
405 LOAD_IF_USED 6, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
406 DEFINE_ARGS %4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
407 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
408
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
409 %macro RET 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
410 POP_IF_USED 6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
411 POP_IF_USED 5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
412 POP_IF_USED 4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
413 POP_IF_USED 3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
414 ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
415 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
416
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
417 %macro REP_RET 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
418 %if regs_used > 3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
419 RET
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
420 %else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
421 rep ret
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
422 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
423 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
424
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
425 %endif ;======================================================================
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
426
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
427
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
428
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
429 ;=============================================================================
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
430 ; arch-independent part
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
431 ;=============================================================================
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
432
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
433 %assign function_align 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
434
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
435 ; Symbol prefix for C linkage
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
436 %macro cglobal 1-2+
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
437 %xdefine %1 ff_%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
438 %ifdef PREFIX
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
439 %xdefine %1 _ %+ %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
440 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
441 %ifidn __OUTPUT_FORMAT__,elf
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
442 global %1:function hidden
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
443 %else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
444 global %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
445 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
446 align function_align
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
447 %1:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
448 RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
449 %assign stack_offset 0
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
450 %if %0 > 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
451 PROLOGUE %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
452 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
453 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
454
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
455 %macro cextern 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
456 %ifdef PREFIX
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
457 %xdefine %1 _%1
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
458 %endif
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
459 extern %1
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
460 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
461
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
462 ; This is needed for ELF, otherwise the GNU linker assumes the stack is
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
463 ; executable by default.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
464 %ifidn __OUTPUT_FORMAT__,elf
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
465 SECTION .note.GNU-stack noalloc noexec nowrite progbits
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
466 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
467
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
468 %assign FENC_STRIDE 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
469 %assign FDEC_STRIDE 32
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
470
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
471 ; merge mmx and sse*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
472
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
473 %macro CAT_XDEFINE 3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
474 %xdefine %1%2 %3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
475 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
476
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
477 %macro CAT_UNDEF 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
478 %undef %1%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
479 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
480
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
481 %macro INIT_MMX 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
482 %define RESET_MM_PERMUTATION INIT_MMX
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
483 %define mmsize 8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
484 %define num_mmregs 8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
485 %define mova movq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
486 %define movu movq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
487 %define movh movd
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
488 %define movnt movntq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
489 %assign %%i 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
490 %rep 8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
491 CAT_XDEFINE m, %%i, mm %+ %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
492 CAT_XDEFINE nmm, %%i, %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
493 %assign %%i %%i+1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
494 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
495 %rep 8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
496 CAT_UNDEF m, %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
497 CAT_UNDEF nmm, %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
498 %assign %%i %%i+1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
499 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
500 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
501
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
502 %macro INIT_XMM 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
503 %define RESET_MM_PERMUTATION INIT_XMM
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
504 %define mmsize 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
505 %define num_mmregs 8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
506 %ifdef ARCH_X86_64
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
507 %define num_mmregs 16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
508 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
509 %define mova movdqa
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
510 %define movu movdqu
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
511 %define movh movq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
512 %define movnt movntdq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
513 %assign %%i 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
514 %rep num_mmregs
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
515 CAT_XDEFINE m, %%i, xmm %+ %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
516 CAT_XDEFINE nxmm, %%i, %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
517 %assign %%i %%i+1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
518 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
519 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
520
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
521 INIT_MMX
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
522
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
523 ; I often want to use macros that permute their arguments. e.g. there's no
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
524 ; efficient way to implement butterfly or transpose or dct without swapping some
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
525 ; arguments.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
526 ;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
527 ; I would like to not have to manually keep track of the permutations:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
528 ; If I insert a permutation in the middle of a function, it should automatically
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
529 ; change everything that follows. For more complex macros I may also have multiple
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
530 ; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
531 ;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
532 ; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
533 ; permutes its arguments. It's equivalent to exchanging the contents of the
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
534 ; registers, except that this way you exchange the register names instead, so it
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
535 ; doesn't cost any cycles.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
536
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
537 %macro PERMUTE 2-* ; takes a list of pairs to swap
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
538 %rep %0/2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
539 %xdefine tmp%2 m%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
540 %xdefine ntmp%2 nm%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
541 %rotate 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
542 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
543 %rep %0/2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
544 %xdefine m%1 tmp%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
545 %xdefine nm%1 ntmp%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
546 %undef tmp%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
547 %undef ntmp%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
548 %rotate 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
549 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
550 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
551
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
552 %macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
553 %rep %0-1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
554 %ifdef m%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
555 %xdefine tmp m%1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
556 %xdefine m%1 m%2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
557 %xdefine m%2 tmp
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
558 CAT_XDEFINE n, m%1, %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
559 CAT_XDEFINE n, m%2, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
560 %else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
561 ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
562 ; Be careful using this mode in nested macros though, as in some cases there may be
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
563 ; other copies of m# that have already been dereferenced and don't get updated correctly.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
564 %xdefine %%n1 n %+ %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
565 %xdefine %%n2 n %+ %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
566 %xdefine tmp m %+ %%n1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
567 CAT_XDEFINE m, %%n1, m %+ %%n2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
568 CAT_XDEFINE m, %%n2, tmp
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
569 CAT_XDEFINE n, m %+ %%n1, %%n1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
570 CAT_XDEFINE n, m %+ %%n2, %%n2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
571 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
572 %undef tmp
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
573 %rotate 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
574 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
575 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
576
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
577 %macro SAVE_MM_PERMUTATION 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
578 %assign %%i 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
579 %rep num_mmregs
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
580 CAT_XDEFINE %1_m, %%i, m %+ %%i
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
581 %assign %%i %%i+1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
582 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
583 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
584
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
585 %macro LOAD_MM_PERMUTATION 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
586 %assign %%i 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
587 %rep num_mmregs
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
588 CAT_XDEFINE m, %%i, %1_m %+ %%i
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
589 CAT_XDEFINE n, m %+ %%i, %%i
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
590 %assign %%i %%i+1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
591 %endrep
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
592 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
593
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
594 %macro call 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
595 call %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
596 %ifdef %1_m0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
597 LOAD_MM_PERMUTATION %1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
598 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
599 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
600
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
601 ;Substitutions that reduce instruction size but are functionally equivalent
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
602 %define movdqa movaps
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
603 %define movdqu movups
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
604
10019
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
605 %macro add 2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
606 %ifnum %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
607 %if %2==128
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
608 sub %1, -128
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
609 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
610 add %1, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
611 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
612 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
613 add %1, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
614 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
615 %endmacro
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
616
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
617 %macro sub 2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
618 %ifnum %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
619 %if %2==128
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
620 add %1, -128
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
621 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
622 sub %1, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
623 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
624 %else
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
625 sub %1, %2
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
626 %endif
c08ca946c80a Update x264 asm code to latest to add support for 64-bit Windows.
darkshikari
parents: 8430
diff changeset
627 %endmacro