annotate mmx.h @ 19619:a83e5b8d2e63

Patch from Karolina Lindqvist <karolina.lindqvist@kramnet.se> "There is a bug in the zoran -vo zr driver, that makes the output garbled always. It also probably affects the zrmjpeg filter. This patch takes care of the problem." Patch tested and OK. And 10l to me, because this bug probably has existed for a looong time.
author rik
date Fri, 01 Sep 2006 18:49:40 +0000
parents f03a8d54e5f9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
1 /* mmx.h
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
2
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
3 MultiMedia eXtensions GCC interface library for IA32.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
4
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
5 To use this library, simply include this header file
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
6 and compile with GCC. You MUST have inlining enabled
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
7 in order for mmx_ok() to work; this can be done by
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
8 simply using -O on the GCC command line.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
9
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
10 Compiling with -DMMX_TRACE will cause detailed trace
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
11 output to be sent to stderr for each mmx operation.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
12 This adds lots of code, and obviously slows execution to
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
13 a crawl, but can be very useful for debugging.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
14
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
15 THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
17 LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
18 AND FITNESS FOR ANY PARTICULAR PURPOSE.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
19
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
20 1997-99 by H. Dietz and R. Fisher
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
21
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
22 Notes:
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
23 It appears that the latest gas has the pand problem fixed, therefore
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
24 I'll undefine BROKEN_PAND by default.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
25 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
26
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
27 #ifndef _MMX_H
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
28 #define _MMX_H
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
29
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
30
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
31 /* Warning: at this writing, the version of GAS packaged
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
32 with most Linux distributions does not handle the
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
33 parallel AND operation mnemonic correctly. If the
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
34 symbol BROKEN_PAND is defined, a slower alternative
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
35 coding will be used. If execution of mmxtest results
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
36 in an illegal instruction fault, define this symbol.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
37 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
38 #undef BROKEN_PAND
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
39
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
40
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
41 /* The type of an value that fits in an MMX register
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
42 (note that long long constant values MUST be suffixed
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
43 by LL and unsigned long long values by ULL, lest
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
44 they be truncated by the compiler)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
45 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
46 typedef union {
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
47 long long q; /* Quadword (64-bit) value */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
48 unsigned long long uq; /* Unsigned Quadword */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
49 int d[2]; /* 2 Doubleword (32-bit) values */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
50 unsigned int ud[2]; /* 2 Unsigned Doubleword */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
51 short w[4]; /* 4 Word (16-bit) values */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
52 unsigned short uw[4]; /* 4 Unsigned Word */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
53 char b[8]; /* 8 Byte (8-bit) values */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
54 unsigned char ub[8]; /* 8 Unsigned Byte */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
55 float s[2]; /* Single-precision (32-bit) value */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
56 } __attribute__ ((aligned (8))) mmx_t; /* On an 8-byte (64-bit) boundary */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
57
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
58
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
59
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
60 /* Function to test if multimedia instructions are supported...
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
61 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
62 inline extern int
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
63 mm_support(void)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
64 {
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
65 /* Returns 1 if MMX instructions are supported,
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
66 3 if Cyrix MMX and Extended MMX instructions are supported
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
67 5 if AMD MMX and 3DNow! instructions are supported
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
68 0 if hardware does not support any of these
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
69 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
70 register int rval = 0;
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
71
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
72 __asm__ __volatile__ (
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
73 /* See if CPUID instruction is supported ... */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
74 /* ... Get copies of EFLAGS into eax and ecx */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
75 "pushf\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
76 "popl %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
77 "movl %%eax, %%ecx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
78
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
79 /* ... Toggle the ID bit in one copy and store */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
80 /* to the EFLAGS reg */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
81 "xorl $0x200000, %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
82 "push %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
83 "popf\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
84
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
85 /* ... Get the (hopefully modified) EFLAGS */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
86 "pushf\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
87 "popl %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
88
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
89 /* ... Compare and test result */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
90 "xorl %%eax, %%ecx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
91 "testl $0x200000, %%ecx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
92 "jz NotSupported1\n\t" /* CPUID not supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
93
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
94
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
95 /* Get standard CPUID information, and
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
96 go to a specific vendor section */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
97 "movl $0, %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
98 "cpuid\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
99
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
100 /* Check for Intel */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
101 "cmpl $0x756e6547, %%ebx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
102 "jne TryAMD\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
103 "cmpl $0x49656e69, %%edx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
104 "jne TryAMD\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
105 "cmpl $0x6c65746e, %%ecx\n"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
106 "jne TryAMD\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
107 "jmp Intel\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
108
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
109 /* Check for AMD */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
110 "\nTryAMD:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
111 "cmpl $0x68747541, %%ebx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
112 "jne TryCyrix\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
113 "cmpl $0x69746e65, %%edx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
114 "jne TryCyrix\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
115 "cmpl $0x444d4163, %%ecx\n"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
116 "jne TryCyrix\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
117 "jmp AMD\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
118
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
119 /* Check for Cyrix */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
120 "\nTryCyrix:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
121 "cmpl $0x69727943, %%ebx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
122 "jne NotSupported2\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
123 "cmpl $0x736e4978, %%edx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
124 "jne NotSupported3\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
125 "cmpl $0x64616574, %%ecx\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
126 "jne NotSupported4\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
127 /* Drop through to Cyrix... */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
128
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
129
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
130 /* Cyrix Section */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
131 /* See if extended CPUID level 80000001 is supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
132 /* The value of CPUID/80000001 for the 6x86MX is undefined
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
133 according to the Cyrix CPU Detection Guide (Preliminary
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
134 Rev. 1.01 table 1), so we'll check the value of eax for
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
135 CPUID/0 to see if standard CPUID level 2 is supported.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
136 According to the table, the only CPU which supports level
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
137 2 is also the only one which supports extended CPUID levels.
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
138 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
139 "cmpl $0x2, %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
140 "jne MMXtest\n\t" /* Use standard CPUID instead */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
141
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
142 /* Extended CPUID supported (in theory), so get extended
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
143 features */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
144 "movl $0x80000001, %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
145 "cpuid\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
146 "testl $0x00800000, %%eax\n\t" /* Test for MMX */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
147 "jz NotSupported5\n\t" /* MMX not supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
148 "testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
149 "jnz EMMXSupported\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
150 "movl $1, %0:\n\n\t" /* MMX Supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
151 "jmp Return\n\n"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
152 "EMMXSupported:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
153 "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
154 "jmp Return\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
155
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
156
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
157 /* AMD Section */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
158 "AMD:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
159
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
160 /* See if extended CPUID is supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
161 "movl $0x80000000, %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
162 "cpuid\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
163 "cmpl $0x80000000, %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
164 "jl MMXtest\n\t" /* Use standard CPUID instead */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
165
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
166 /* Extended CPUID supported, so get extended features */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
167 "movl $0x80000001, %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
168 "cpuid\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
169 "testl $0x00800000, %%edx\n\t" /* Test for MMX */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
170 "jz NotSupported6\n\t" /* MMX not supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
171 "testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
172 "jnz ThreeDNowSupported\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
173 "movl $1, %0:\n\n\t" /* MMX Supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
174 "jmp Return\n\n"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
175 "ThreeDNowSupported:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
176 "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
177 "jmp Return\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
178
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
179
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
180 /* Intel Section */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
181 "Intel:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
182
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
183 /* Check for MMX */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
184 "MMXtest:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
185 "movl $1, %%eax\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
186 "cpuid\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
187 "testl $0x00800000, %%edx\n\t" /* Test for MMX */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
188 "jz NotSupported7\n\t" /* MMX Not supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
189 "movl $1, %0:\n\n\t" /* MMX Supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
190 "jmp Return\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
191
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
192 /* Nothing supported */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
193 "\nNotSupported1:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
194 "#movl $101, %0:\n\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
195 "\nNotSupported2:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
196 "#movl $102, %0:\n\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
197 "\nNotSupported3:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
198 "#movl $103, %0:\n\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
199 "\nNotSupported4:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
200 "#movl $104, %0:\n\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
201 "\nNotSupported5:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
202 "#movl $105, %0:\n\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
203 "\nNotSupported6:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
204 "#movl $106, %0:\n\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
205 "\nNotSupported7:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
206 "#movl $107, %0:\n\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
207 "movl $0, %0:\n\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
208
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
209 "Return:\n\t"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
210 : "=a" (rval)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
211 : /* no input */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
212 : "eax", "ebx", "ecx", "edx"
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
213 );
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
214
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
215 /* Return */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
216 return(rval);
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
217 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
218
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
219 /* Function to test if mmx instructions are supported...
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
220 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
221 inline extern int
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
222 mmx_ok(void)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
223 {
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
224 /* Returns 1 if MMX instructions are supported, 0 otherwise */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
225 return ( mm_support() & 0x1 );
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
226 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
227
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
228
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
229 /* Helper functions for the instruction macros that follow...
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
230 (note that memory-to-register, m2r, instructions are nearly
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
231 as efficient as register-to-register, r2r, instructions;
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
232 however, memory-to-memory instructions are really simulated
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
233 as a convenience, and are only 1/3 as efficient)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
234 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
235 #ifdef MMX_TRACE
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
236
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
237 /* Include the stuff for printing a trace to stderr...
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
238 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
239
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
240 #include <stdio.h>
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
241
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
242 #define mmx_i2r(op, imm, reg) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
243 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
244 mmx_t mmx_trace; \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
245 mmx_trace.uq = (imm); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
246 printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
247 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
248 __asm__ __volatile__ ("movq %%" #reg ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
249 : "=m" (mmx_trace) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
250 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
251 printf(#reg "=0x%08x%08x) => ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
252 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
253 __asm__ __volatile__ (#op " %0, %%" #reg \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
254 : /* nothing */ \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
255 : "i" (imm)); \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
256 __asm__ __volatile__ ("movq %%" #reg ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
257 : "=m" (mmx_trace) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
258 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
259 printf(#reg "=0x%08x%08x\n", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
260 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
261 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
262
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
263 #define mmx_m2r(op, mem, reg) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
264 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
265 mmx_t mmx_trace; \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
266 mmx_trace = (mem); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
267 printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
268 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
269 __asm__ __volatile__ ("movq %%" #reg ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
270 : "=m" (mmx_trace) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
271 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
272 printf(#reg "=0x%08x%08x) => ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
273 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
274 __asm__ __volatile__ (#op " %0, %%" #reg \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
275 : /* nothing */ \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
276 : "X" (mem)); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
277 __asm__ __volatile__ ("movq %%" #reg ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
278 : "=m" (mmx_trace) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
279 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
280 printf(#reg "=0x%08x%08x\n", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
281 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
282 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
283
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
284 #define mmx_r2m(op, reg, mem) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
285 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
286 mmx_t mmx_trace; \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
287 __asm__ __volatile__ ("movq %%" #reg ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
288 : "=m" (mmx_trace) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
289 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
290 printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
291 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
292 mmx_trace = (mem); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
293 printf(#mem "=0x%08x%08x) => ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
294 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
295 __asm__ __volatile__ (#op " %%" #reg ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
296 : "=m" (mem) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
297 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
298 mmx_trace = (mem); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
299 printf(#mem "=0x%08x%08x\n", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
300 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
301 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
302
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
303 #define mmx_r2r(op, regs, regd) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
304 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
305 mmx_t mmx_trace; \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
306 __asm__ __volatile__ ("movq %%" #regs ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
307 : "=m" (mmx_trace) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
308 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
309 printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
310 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
311 __asm__ __volatile__ ("movq %%" #regd ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
312 : "=m" (mmx_trace) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
313 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
314 printf(#regd "=0x%08x%08x) => ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
315 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
316 __asm__ __volatile__ (#op " %" #regs ", %" #regd); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
317 __asm__ __volatile__ ("movq %%" #regd ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
318 : "=m" (mmx_trace) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
319 : /* nothing */ ); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
320 printf(#regd "=0x%08x%08x\n", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
321 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
322 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
323
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
324 #define mmx_m2m(op, mems, memd) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
325 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
326 mmx_t mmx_trace; \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
327 mmx_trace = (mems); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
328 printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
329 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
330 mmx_trace = (memd); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
331 printf(#memd "=0x%08x%08x) => ", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
332 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
333 __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
334 #op " %1, %%mm0\n\t" \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
335 "movq %%mm0, %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
336 : "=m" (memd) \
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
337 : "m" (mems)); \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
338 mmx_trace = (memd); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
339 printf(#memd "=0x%08x%08x\n", \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
340 mmx_trace.d[1], mmx_trace.d[0]); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
341 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
342
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
343 #else
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
344
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
345 /* These macros are a lot simpler without the tracing...
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
346 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
347
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
348 #define mmx_i2r(op, imm, reg) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
349 __asm__ __volatile__ (#op " %0, %%" #reg \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
350 : /* nothing */ \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
351 : "i" (imm) )
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
352
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
353 #define mmx_m2r(op, mem, reg) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
354 __asm__ __volatile__ (#op " %0, %%" #reg \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
355 : /* nothing */ \
15617
130dd060f723 one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents: 2509
diff changeset
356 : "m" (mem))
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
357
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
358 #define mmx_r2m(op, reg, mem) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
359 __asm__ __volatile__ (#op " %%" #reg ", %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
360 : "=m" (mem) \
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
361 : /* nothing */ )
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
362
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
363 #define mmx_r2r(op, regs, regd) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
364 __asm__ __volatile__ (#op " %" #regs ", %" #regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
365
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
366 #define mmx_m2m(op, mems, memd) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
367 __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
368 #op " %1, %%mm0\n\t" \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
369 "movq %%mm0, %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
370 : "=m" (memd) \
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
371 : "m" (mems))
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
372
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
373 #endif
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
374
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
375
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
376 /* 1x64 MOVe Quadword
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
377 (this is both a load and a store...
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
378 in fact, it is the only way to store)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
379 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
380 #define movq_m2r(var, reg) mmx_m2r(movq, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
381 #define movq_r2m(reg, var) mmx_r2m(movq, reg, var)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
382 #define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
383 #define movq(vars, vard) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
384 __asm__ __volatile__ ("movq %1, %%mm0\n\t" \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
385 "movq %%mm0, %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
386 : "=m" (vard) \
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
387 : "m" (vars))
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
388
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
389
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
390 /* 1x32 MOVe Doubleword
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
391 (like movq, this is both load and store...
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
392 but is most useful for moving things between
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
393 mmx registers and ordinary registers)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
394 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
395 #define movd_m2r(var, reg) mmx_m2r(movd, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
396 #define movd_r2m(reg, var) mmx_r2m(movd, reg, var)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
397 #define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
398 #define movd(vars, vard) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
399 __asm__ __volatile__ ("movd %1, %%mm0\n\t" \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
400 "movd %%mm0, %0" \
15808
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
401 : "=m" (vard) \
f03a8d54e5f9 fix asm constraints, tested on x86 and x86_64.
reimar
parents: 15617
diff changeset
402 : "m" (vars))
2509
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
403
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
404
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
405 /* 2x32, 4x16, and 8x8 Parallel ADDs
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
406 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
407 #define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
408 #define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
409 #define paddd(vars, vard) mmx_m2m(paddd, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
410
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
411 #define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
412 #define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
413 #define paddw(vars, vard) mmx_m2m(paddw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
414
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
415 #define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
416 #define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
417 #define paddb(vars, vard) mmx_m2m(paddb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
418
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
419
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
420 /* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
421 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
422 #define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
423 #define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
424 #define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
425
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
426 #define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
427 #define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
428 #define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
429
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
430
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
431 /* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
432 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
433 #define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
434 #define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
435 #define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
436
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
437 #define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
438 #define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
439 #define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
440
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
441
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
442 /* 2x32, 4x16, and 8x8 Parallel SUBs
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
443 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
444 #define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
445 #define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
446 #define psubd(vars, vard) mmx_m2m(psubd, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
447
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
448 #define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
449 #define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
450 #define psubw(vars, vard) mmx_m2m(psubw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
451
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
452 #define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
453 #define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
454 #define psubb(vars, vard) mmx_m2m(psubb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
455
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
456
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
457 /* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
458 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
459 #define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
460 #define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
461 #define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
462
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
463 #define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
464 #define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
465 #define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
466
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
467
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
468 /* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
469 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
470 #define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
471 #define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
472 #define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
473
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
474 #define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
475 #define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
476 #define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
477
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
478
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
479 /* 4x16 Parallel MULs giving Low 4x16 portions of results
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
480 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
481 #define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
482 #define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
483 #define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
484
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
485
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
486 /* 4x16 Parallel MULs giving High 4x16 portions of results
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
487 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
488 #define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
489 #define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
490 #define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
491
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
492
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
493 /* 4x16->2x32 Parallel Mul-ADD
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
494 (muls like pmullw, then adds adjacent 16-bit fields
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
495 in the multiply result to make the final 2x32 result)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
496 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
497 #define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
498 #define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
499 #define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
500
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
501
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
502 /* 1x64 bitwise AND
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
503 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
504 #ifdef BROKEN_PAND
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
505 #define pand_m2r(var, reg) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
506 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
507 mmx_m2r(pandn, (mmx_t) -1LL, reg); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
508 mmx_m2r(pandn, var, reg); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
509 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
510 #define pand_r2r(regs, regd) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
511 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
512 mmx_m2r(pandn, (mmx_t) -1LL, regd); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
513 mmx_r2r(pandn, regs, regd) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
514 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
515 #define pand(vars, vard) \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
516 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
517 movq_m2r(vard, mm0); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
518 mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
519 mmx_m2r(pandn, vars, mm0); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
520 movq_r2m(mm0, vard); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
521 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
522 #else
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
523 #define pand_m2r(var, reg) mmx_m2r(pand, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
524 #define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
525 #define pand(vars, vard) mmx_m2m(pand, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
526 #endif
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
527
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
528
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
529 /* 1x64 bitwise AND with Not the destination
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
530 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
531 #define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
532 #define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
533 #define pandn(vars, vard) mmx_m2m(pandn, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
534
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
535
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
536 /* 1x64 bitwise OR
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
537 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
538 #define por_m2r(var, reg) mmx_m2r(por, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
539 #define por_r2r(regs, regd) mmx_r2r(por, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
540 #define por(vars, vard) mmx_m2m(por, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
541
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
542
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
543 /* 1x64 bitwise eXclusive OR
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
544 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
545 #define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
546 #define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
547 #define pxor(vars, vard) mmx_m2m(pxor, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
548
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
549
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
550 /* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
551 (resulting fields are either 0 or -1)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
552 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
553 #define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
554 #define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
555 #define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
556
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
557 #define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
558 #define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
559 #define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
560
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
561 #define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
562 #define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
563 #define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
564
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
565
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
566 /* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
567 (resulting fields are either 0 or -1)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
568 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
569 #define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
570 #define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
571 #define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
572
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
573 #define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
574 #define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
575 #define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
576
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
577 #define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
578 #define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
579 #define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
580
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
581
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
582 /* 1x64, 2x32, and 4x16 Parallel Shift Left Logical
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
583 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
584 #define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
585 #define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
586 #define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
587 #define psllq(vars, vard) mmx_m2m(psllq, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
588
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
589 #define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
590 #define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
591 #define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
592 #define pslld(vars, vard) mmx_m2m(pslld, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
593
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
594 #define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
595 #define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
596 #define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
597 #define psllw(vars, vard) mmx_m2m(psllw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
598
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
599
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
600 /* 1x64, 2x32, and 4x16 Parallel Shift Right Logical
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
601 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
602 #define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
603 #define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
604 #define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
605 #define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
606
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
607 #define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
608 #define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
609 #define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
610 #define psrld(vars, vard) mmx_m2m(psrld, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
611
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
612 #define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
613 #define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
614 #define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
615 #define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
616
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
617
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
618 /* 2x32 and 4x16 Parallel Shift Right Arithmetic
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
619 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
620 #define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
621 #define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
622 #define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
623 #define psrad(vars, vard) mmx_m2m(psrad, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
624
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
625 #define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
626 #define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
627 #define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
628 #define psraw(vars, vard) mmx_m2m(psraw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
629
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
630
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
631 /* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
632 (packs source and dest fields into dest in that order)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
633 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
634 #define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
635 #define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
636 #define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
637
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
638 #define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
639 #define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
640 #define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
641
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
642
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
643 /* 4x16->8x8 PACK and Unsigned Saturate
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
644 (packs source and dest fields into dest in that order)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
645 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
646 #define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
647 #define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
648 #define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
649
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
650
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
651 /* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
652 (interleaves low half of dest with low half of source
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
653 as padding in each result field)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
654 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
655 #define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
656 #define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
657 #define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
658
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
659 #define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
660 #define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
661 #define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
662
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
663 #define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
664 #define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
665 #define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
666
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
667
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
668 /* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
669 (interleaves high half of dest with high half of source
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
670 as padding in each result field)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
671 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
672 #define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
673 #define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
674 #define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
675
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
676 #define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
677 #define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
678 #define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
679
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
680 #define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
681 #define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
682 #define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
683
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
684
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
685 /* Empty MMx State
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
686 (used to clean-up when going from mmx to float use
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
687 of the registers that are shared by both; note that
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
688 there is no float-to-mmx operation needed, because
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
689 only the float tag word info is corruptible)
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
690 */
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
691 #ifdef MMX_TRACE
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
692
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
693 #define emms() \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
694 { \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
695 printf("emms()\n"); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
696 __asm__ __volatile__ ("emms"); \
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
697 }
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
698
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
699 #else
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
700
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
701 #define emms() __asm__ __volatile__ ("emms")
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
702
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
703 #endif
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
704
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
705 #endif
22ee3be96a7f forgot, sorry
nick
parents:
diff changeset
706