Mercurial > libavcodec.hg
annotate i386/dsputil_mmx.c @ 550:b746a7d75ce6 libavcodec
Force inlining on get_vlc2.
author | mellum |
---|---|
date | Sat, 13 Jul 2002 19:31:15 +0000 |
parents | d7f65ea52aaa |
children | c9b17c1a02e0 |
rev | line source |
---|---|
0 | 1 /* |
2 * MMX optimized DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
0 | 4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
0 | 9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
0 | 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
0 | 14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
0 | 18 * |
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
20 */ | |
21 | |
22 #include "../dsputil.h" | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
23 #include "../simple_idct.h" |
0 | 24 |
5 | 25 int mm_flags; /* multimedia extension flags */ |
26 | |
294 | 27 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
28 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
29 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
30 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
31 | |
32 int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
33 int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
34 int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
35 int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
36 | |
37 int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
38 int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
39 int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
40 int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
41 | |
42 int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
43 int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
44 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
45 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
46 | |
42 | 47 /* external functions, from idct_mmx.c */ |
48 void ff_mmx_idct(DCTELEM *block); | |
49 void ff_mmxext_idct(DCTELEM *block); | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
8
diff
changeset
|
50 |
0 | 51 /* pixel operations */ |
387 | 52 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; |
53 static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; | |
54 static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL; | |
0 | 55 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
56 #define JUMPALIGN() __asm __volatile (".balign 8"::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
57 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
58 |
448 | 59 #define MOVQ_WONE(regd) \ |
60 __asm __volatile ( \ | |
61 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ | |
62 "psrlw $15, %%" #regd ::) | |
63 | |
64 #define MOVQ_BFE(regd) \ | |
65 __asm __volatile ( \ | |
66 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ | |
67 "paddb %%" #regd ", %%" #regd " \n\t" ::) | |
68 | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
69 #ifndef PIC |
448 | 70 #define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone)) |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
71 #define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo)) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
72 #else |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
73 // for shared library it's better to use this way for accessing constants |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
74 // pcmpeqd -> -1 |
448 | 75 #define MOVQ_BONE(regd) \ |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
76 __asm __volatile ( \ |
448 | 77 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
78 "psrlw $15, %%" #regd " \n\t" \ | |
79 "packuswb %%" #regd ", %%" #regd " \n\t" ::) | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
80 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
81 #define MOVQ_WTWO(regd) \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
82 __asm __volatile ( \ |
448 | 83 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
84 "psrlw $15, %%" #regd " \n\t" \ | |
85 "psllw $1, %%" #regd " \n\t"::) | |
387 | 86 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
87 #endif |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
88 |
448 | 89 // using regr as temporary and for the output result |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
90 // first argument is unmodifed and second is trashed |
471 | 91 // regfe is supposed to contain 0xfefefefefefefefe |
92 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \ | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
93 "movq " #rega ", " #regr " \n\t"\ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
94 "pand " #regb ", " #regr " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
95 "pxor " #rega ", " #regb " \n\t"\ |
471 | 96 "pand " #regfe "," #regb " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
97 "psrlq $1, " #regb " \n\t"\ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
98 "paddb " #regb ", " #regr " \n\t" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
99 |
471 | 100 #define PAVGB_MMX(rega, regb, regr, regfe) \ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
101 "movq " #rega ", " #regr " \n\t"\ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
102 "por " #regb ", " #regr " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
103 "pxor " #rega ", " #regb " \n\t"\ |
471 | 104 "pand " #regfe "," #regb " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
105 "psrlq $1, " #regb " \n\t"\ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
106 "psubb " #regb ", " #regr " \n\t" |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
107 |
471 | 108 // mm6 is supposed to contain 0xfefefefefefefefe |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
109 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
110 "movq " #rega ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
111 "movq " #regc ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
112 "pand " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
113 "pand " #regd ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
114 "pxor " #rega ", " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
115 "pxor " #regc ", " #regd " \n\t"\ |
448 | 116 "pand %%mm6, " #regb " \n\t"\ |
117 "pand %%mm6, " #regd " \n\t"\ | |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
118 "psrlq $1, " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
119 "psrlq $1, " #regd " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
120 "paddb " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
121 "paddb " #regd ", " #regp " \n\t" |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
122 |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
123 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
124 "movq " #rega ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
125 "movq " #regc ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
126 "por " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
127 "por " #regd ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
128 "pxor " #rega ", " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
129 "pxor " #regc ", " #regd " \n\t"\ |
448 | 130 "pand %%mm6, " #regb " \n\t"\ |
131 "pand %%mm6, " #regd " \n\t"\ | |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
132 "psrlq $1, " #regd " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
133 "psrlq $1, " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
134 "psubb " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
135 "psubb " #regd ", " #regp " \n\t" |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
136 |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
137 /***********************************/ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
138 /* MMX no rounding */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
139 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx |
448 | 140 #define SET_RND MOVQ_WONE |
141 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) | |
471 | 142 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
143 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
144 #include "dsputil_mmx_rnd.h" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
145 |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
146 #undef DEF |
448 | 147 #undef SET_RND |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
148 #undef PAVGBP |
471 | 149 #undef PAVGB |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
150 /***********************************/ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
151 /* MMX rounding */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
152 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
153 #define DEF(x, y) x ## _ ## y ##_mmx |
448 | 154 #define SET_RND MOVQ_WTWO |
155 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) | |
471 | 156 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
157 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
158 #include "dsputil_mmx_rnd.h" |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
159 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
160 #undef DEF |
448 | 161 #undef SET_RND |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
162 #undef PAVGBP |
471 | 163 #undef PAVGB |
387 | 164 |
0 | 165 /***********************************/ |
166 /* 3Dnow specific */ | |
167 | |
168 #define DEF(x) x ## _3dnow | |
169 /* for Athlons PAVGUSB is prefered */ | |
170 #define PAVGB "pavgusb" | |
171 | |
172 #include "dsputil_mmx_avg.h" | |
173 | |
174 #undef DEF | |
175 #undef PAVGB | |
176 | |
177 /***********************************/ | |
178 /* MMX2 specific */ | |
179 | |
386 | 180 #define DEF(x) x ## _mmx2 |
0 | 181 |
182 /* Introduced only in MMX2 set */ | |
183 #define PAVGB "pavgb" | |
184 | |
185 #include "dsputil_mmx_avg.h" | |
186 | |
187 #undef DEF | |
188 #undef PAVGB | |
189 | |
190 /***********************************/ | |
191 /* standard MMX */ | |
192 | |
193 static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) | |
194 { | |
386 | 195 asm volatile( |
196 "movl $-128, %%eax \n\t" | |
197 "pxor %%mm7, %%mm7 \n\t" | |
198 ".balign 16 \n\t" | |
199 "1: \n\t" | |
200 "movq (%0), %%mm0 \n\t" | |
201 "movq (%0, %2), %%mm2 \n\t" | |
202 "movq %%mm0, %%mm1 \n\t" | |
203 "movq %%mm2, %%mm3 \n\t" | |
204 "punpcklbw %%mm7, %%mm0 \n\t" | |
205 "punpckhbw %%mm7, %%mm1 \n\t" | |
206 "punpcklbw %%mm7, %%mm2 \n\t" | |
207 "punpckhbw %%mm7, %%mm3 \n\t" | |
208 "movq %%mm0, (%1, %%eax)\n\t" | |
209 "movq %%mm1, 8(%1, %%eax)\n\t" | |
210 "movq %%mm2, 16(%1, %%eax)\n\t" | |
211 "movq %%mm3, 24(%1, %%eax)\n\t" | |
212 "addl %3, %0 \n\t" | |
213 "addl $32, %%eax \n\t" | |
214 "js 1b \n\t" | |
215 : "+r" (pixels) | |
216 : "r" (block+64), "r" (line_size), "r" (line_size*2) | |
217 : "%eax" | |
218 ); | |
0 | 219 } |
220 | |
324 | 221 static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) |
222 { | |
223 asm volatile( | |
386 | 224 "pxor %%mm7, %%mm7 \n\t" |
225 "movl $-128, %%eax \n\t" | |
324 | 226 ".balign 16 \n\t" |
227 "1: \n\t" | |
228 "movq (%0), %%mm0 \n\t" | |
229 "movq (%1), %%mm2 \n\t" | |
230 "movq %%mm0, %%mm1 \n\t" | |
231 "movq %%mm2, %%mm3 \n\t" | |
232 "punpcklbw %%mm7, %%mm0 \n\t" | |
233 "punpckhbw %%mm7, %%mm1 \n\t" | |
234 "punpcklbw %%mm7, %%mm2 \n\t" | |
235 "punpckhbw %%mm7, %%mm3 \n\t" | |
236 "psubw %%mm2, %%mm0 \n\t" | |
237 "psubw %%mm3, %%mm1 \n\t" | |
238 "movq %%mm0, (%2, %%eax)\n\t" | |
239 "movq %%mm1, 8(%2, %%eax)\n\t" | |
240 "addl %3, %0 \n\t" | |
241 "addl %3, %1 \n\t" | |
242 "addl $16, %%eax \n\t" | |
243 "jnz 1b \n\t" | |
244 : "+r" (s1), "+r" (s2) | |
245 : "r" (block+64), "r" (stride) | |
246 : "%eax" | |
247 ); | |
248 } | |
249 | |
0 | 250 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
251 { | |
252 const DCTELEM *p; | |
253 UINT8 *pix; | |
254 | |
255 /* read the pixels */ | |
256 p = block; | |
257 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
258 /* unrolled loop */ |
0 | 259 __asm __volatile( |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
260 "movq %3, %%mm0\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
261 "movq 8%3, %%mm1\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
262 "movq 16%3, %%mm2\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
263 "movq 24%3, %%mm3\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
264 "movq 32%3, %%mm4\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
265 "movq 40%3, %%mm5\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
266 "movq 48%3, %%mm6\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
267 "movq 56%3, %%mm7\n\t" |
0 | 268 "packuswb %%mm1, %%mm0\n\t" |
269 "packuswb %%mm3, %%mm2\n\t" | |
270 "packuswb %%mm5, %%mm4\n\t" | |
271 "packuswb %%mm7, %%mm6\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
272 "movq %%mm0, (%0)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
273 "movq %%mm2, (%0, %1)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
274 "movq %%mm4, (%0, %1, 2)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
275 "movq %%mm6, (%0, %2)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
276 ::"r" (pix), "r" (line_size), "r" (line_size*3), "m"(*p) |
0 | 277 :"memory"); |
278 pix += line_size*4; | |
279 p += 32; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
280 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
281 // if here would be an exact copy of the code above |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
282 // compiler would generate some very strange code |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
283 // thus using "r" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
284 __asm __volatile( |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
285 "movq (%3), %%mm0\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
286 "movq 8(%3), %%mm1\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
287 "movq 16(%3), %%mm2\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
288 "movq 24(%3), %%mm3\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
289 "movq 32(%3), %%mm4\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
290 "movq 40(%3), %%mm5\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
291 "movq 48(%3), %%mm6\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
292 "movq 56(%3), %%mm7\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
293 "packuswb %%mm1, %%mm0\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
294 "packuswb %%mm3, %%mm2\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
295 "packuswb %%mm5, %%mm4\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
296 "packuswb %%mm7, %%mm6\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
297 "movq %%mm0, (%0)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
298 "movq %%mm2, (%0, %1)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
299 "movq %%mm4, (%0, %1, 2)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
300 "movq %%mm6, (%0, %2)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
301 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
302 :"memory"); |
0 | 303 } |
304 | |
305 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |
306 { | |
307 const DCTELEM *p; | |
308 UINT8 *pix; | |
309 int i; | |
310 | |
311 /* read the pixels */ | |
312 p = block; | |
313 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
314 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
315 i = 4; |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
316 do { |
0 | 317 __asm __volatile( |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
318 "movq (%2), %%mm0\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
319 "movq 8(%2), %%mm1\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
320 "movq 16(%2), %%mm2\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
321 "movq 24(%2), %%mm3\n\t" |
0 | 322 "movq %0, %%mm4\n\t" |
323 "movq %1, %%mm6\n\t" | |
324 "movq %%mm4, %%mm5\n\t" | |
325 "punpcklbw %%mm7, %%mm4\n\t" | |
326 "punpckhbw %%mm7, %%mm5\n\t" | |
327 "paddsw %%mm4, %%mm0\n\t" | |
328 "paddsw %%mm5, %%mm1\n\t" | |
329 "movq %%mm6, %%mm5\n\t" | |
330 "punpcklbw %%mm7, %%mm6\n\t" | |
331 "punpckhbw %%mm7, %%mm5\n\t" | |
332 "paddsw %%mm6, %%mm2\n\t" | |
333 "paddsw %%mm5, %%mm3\n\t" | |
334 "packuswb %%mm1, %%mm0\n\t" | |
335 "packuswb %%mm3, %%mm2\n\t" | |
336 "movq %%mm0, %0\n\t" | |
337 "movq %%mm2, %1\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
338 :"+m"(*pix), "+m"(*(pix+line_size)) |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
339 :"r"(p) |
0 | 340 :"memory"); |
341 pix += line_size*2; | |
342 p += 16; | |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
343 } while (--i); |
0 | 344 } |
345 | |
346 static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
347 { | |
471 | 348 __asm __volatile( |
420 | 349 "lea (%3, %3), %%eax \n\t" |
422 | 350 ".balign 8 \n\t" |
420 | 351 "1: \n\t" |
352 "movq (%1), %%mm0 \n\t" | |
353 "movq (%1, %3), %%mm1 \n\t" | |
354 "movq %%mm0, (%2) \n\t" | |
355 "movq %%mm1, (%2, %3) \n\t" | |
356 "addl %%eax, %1 \n\t" | |
357 "addl %%eax, %2 \n\t" | |
358 "movq (%1), %%mm0 \n\t" | |
359 "movq (%1, %3), %%mm1 \n\t" | |
360 "movq %%mm0, (%2) \n\t" | |
361 "movq %%mm1, (%2, %3) \n\t" | |
362 "addl %%eax, %1 \n\t" | |
363 "addl %%eax, %2 \n\t" | |
364 "subl $4, %0 \n\t" | |
365 "jnz 1b \n\t" | |
366 : "+g"(h), "+r" (pixels), "+r" (block) | |
367 : "r"(line_size) | |
368 : "%eax", "memory" | |
369 ); | |
0 | 370 } |
371 | |
296 | 372 static void clear_blocks_mmx(DCTELEM *blocks) |
373 { | |
471 | 374 __asm __volatile( |
296 | 375 "pxor %%mm7, %%mm7 \n\t" |
376 "movl $-128*6, %%eax \n\t" | |
377 "1: \n\t" | |
378 "movq %%mm7, (%0, %%eax) \n\t" | |
379 "movq %%mm7, 8(%0, %%eax) \n\t" | |
380 "movq %%mm7, 16(%0, %%eax) \n\t" | |
381 "movq %%mm7, 24(%0, %%eax) \n\t" | |
382 "addl $32, %%eax \n\t" | |
383 " js 1b \n\t" | |
384 : : "r" (((int)blocks)+128*6) | |
385 : "%eax" | |
386 ); | |
387 } | |
388 | |
393 | 389 #if 0 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
390 static void just_return() { return; } |
393 | 391 #endif |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
392 |
0 | 393 void dsputil_init_mmx(void) |
394 { | |
395 mm_flags = mm_support(); | |
188 | 396 #if 1 |
397 printf("libavcodec: CPU flags:"); | |
0 | 398 if (mm_flags & MM_MMX) |
399 printf(" mmx"); | |
400 if (mm_flags & MM_MMXEXT) | |
401 printf(" mmxext"); | |
402 if (mm_flags & MM_3DNOW) | |
403 printf(" 3dnow"); | |
404 if (mm_flags & MM_SSE) | |
405 printf(" sse"); | |
406 if (mm_flags & MM_SSE2) | |
407 printf(" sse2"); | |
408 printf("\n"); | |
409 #endif | |
410 | |
411 if (mm_flags & MM_MMX) { | |
412 get_pixels = get_pixels_mmx; | |
324 | 413 diff_pixels = diff_pixels_mmx; |
0 | 414 put_pixels_clamped = put_pixels_clamped_mmx; |
415 add_pixels_clamped = add_pixels_clamped_mmx; | |
296 | 416 clear_blocks= clear_blocks_mmx; |
415 | 417 |
294 | 418 pix_abs16x16 = pix_abs16x16_mmx; |
419 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |
420 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |
0 | 421 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
294 | 422 pix_abs8x8 = pix_abs8x8_mmx; |
423 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |
424 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |
425 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |
0 | 426 av_fdct = fdct_mmx; |
427 | |
428 put_pixels_tab[0] = put_pixels_mmx; | |
429 put_pixels_tab[1] = put_pixels_x2_mmx; | |
430 put_pixels_tab[2] = put_pixels_y2_mmx; | |
431 put_pixels_tab[3] = put_pixels_xy2_mmx; | |
432 | |
433 put_no_rnd_pixels_tab[0] = put_pixels_mmx; | |
434 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
435 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
436 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx; | |
415 | 437 |
0 | 438 avg_pixels_tab[0] = avg_pixels_mmx; |
439 avg_pixels_tab[1] = avg_pixels_x2_mmx; | |
440 avg_pixels_tab[2] = avg_pixels_y2_mmx; | |
441 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
442 | |
443 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx; | |
444 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx; | |
445 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx; | |
446 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx; | |
386 | 447 |
0 | 448 if (mm_flags & MM_MMXEXT) { |
294 | 449 pix_abs16x16 = pix_abs16x16_mmx2; |
450 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; | |
451 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; | |
452 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; | |
415 | 453 |
294 | 454 pix_abs8x8 = pix_abs8x8_mmx2; |
455 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; | |
456 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; | |
457 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; | |
386 | 458 |
459 put_pixels_tab[1] = put_pixels_x2_mmx2; | |
460 put_pixels_tab[2] = put_pixels_y2_mmx2; | |
461 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2; | |
462 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2; | |
415 | 463 |
386 | 464 avg_pixels_tab[0] = avg_pixels_mmx2; |
465 avg_pixels_tab[1] = avg_pixels_x2_mmx2; | |
466 avg_pixels_tab[2] = avg_pixels_y2_mmx2; | |
467 avg_pixels_tab[3] = avg_pixels_xy2_mmx2; | |
0 | 468 } else if (mm_flags & MM_3DNOW) { |
469 put_pixels_tab[1] = put_pixels_x2_3dnow; | |
470 put_pixels_tab[2] = put_pixels_y2_3dnow; | |
386 | 471 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_3dnow; |
472 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_3dnow; | |
393 | 473 |
0 | 474 avg_pixels_tab[0] = avg_pixels_3dnow; |
475 avg_pixels_tab[1] = avg_pixels_x2_3dnow; | |
476 avg_pixels_tab[2] = avg_pixels_y2_3dnow; | |
477 avg_pixels_tab[3] = avg_pixels_xy2_3dnow; | |
478 } | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
8
diff
changeset
|
479 |
42 | 480 /* idct */ |
481 if (mm_flags & MM_MMXEXT) { | |
482 ff_idct = ff_mmxext_idct; | |
483 } else { | |
484 ff_idct = ff_mmx_idct; | |
485 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
486 #ifdef SIMPLE_IDCT |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
487 // ff_idct = simple_idct; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
488 ff_idct = simple_idct_mmx; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
489 #endif |
0 | 490 } |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
491 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
492 #if 0 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
493 // for speed testing |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
494 get_pixels = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
495 put_pixels_clamped = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
496 add_pixels_clamped = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
497 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
498 pix_abs16x16 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
499 pix_abs16x16_x2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
500 pix_abs16x16_y2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
501 pix_abs16x16_xy2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
502 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
503 put_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
504 put_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
505 put_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
506 put_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
507 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
508 put_no_rnd_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
509 put_no_rnd_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
510 put_no_rnd_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
511 put_no_rnd_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
512 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
513 avg_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
514 avg_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
515 avg_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
516 avg_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
517 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
518 avg_no_rnd_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
519 avg_no_rnd_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
520 avg_no_rnd_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
521 avg_no_rnd_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
522 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
523 //av_fdct = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
524 //ff_idct = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
525 #endif |
0 | 526 } |
402 | 527 |
528 /* remove any non bit exact operation (testing purpose). NOTE that | |
529 this function should be kept as small as possible because it is | |
530 always difficult to test automatically non bit exact cases. */ | |
531 void dsputil_set_bit_exact_mmx(void) | |
532 { | |
533 if (mm_flags & MM_MMX) { | |
534 if (mm_flags & MM_MMXEXT) { | |
535 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
536 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
537 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
538 } else if (mm_flags & MM_3DNOW) { | |
539 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
540 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
541 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
542 } | |
543 } | |
544 } |