Mercurial > libavcodec.hg
annotate i386/dsputil_mmx.c @ 625:bb6a69f9d409 libavcodec
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
per context DCT selection
author | michaelni |
---|---|
date | Thu, 29 Aug 2002 23:55:32 +0000 |
parents | c9b17c1a02e0 |
children | f65040bf4357 |
rev | line source |
---|---|
0 | 1 /* |
2 * MMX optimized DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
0 | 4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
0 | 9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
0 | 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
0 | 14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
0 | 18 * |
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
20 */ | |
21 | |
22 #include "../dsputil.h" | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
23 #include "../simple_idct.h" |
0 | 24 |
5 | 25 int mm_flags; /* multimedia extension flags */ |
26 | |
294 | 27 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
28 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
29 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
30 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
31 | |
32 int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
33 int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
34 int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
35 int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
36 | |
37 int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
38 int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
39 int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
40 int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
41 | |
42 int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
43 int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
44 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
45 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
46 | |
42 | 47 /* external functions, from idct_mmx.c */ |
48 void ff_mmx_idct(DCTELEM *block); | |
49 void ff_mmxext_idct(DCTELEM *block); | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
8
diff
changeset
|
50 |
0 | 51 /* pixel operations */ |
387 | 52 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; |
53 static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; | |
54 static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL; | |
0 | 55 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
56 #define JUMPALIGN() __asm __volatile (".balign 8"::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
57 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
58 |
448 | 59 #define MOVQ_WONE(regd) \ |
60 __asm __volatile ( \ | |
61 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ | |
62 "psrlw $15, %%" #regd ::) | |
63 | |
64 #define MOVQ_BFE(regd) \ | |
65 __asm __volatile ( \ | |
66 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ | |
67 "paddb %%" #regd ", %%" #regd " \n\t" ::) | |
68 | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
69 #ifndef PIC |
448 | 70 #define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone)) |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
71 #define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo)) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
72 #else |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
73 // for shared library it's better to use this way for accessing constants |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
74 // pcmpeqd -> -1 |
448 | 75 #define MOVQ_BONE(regd) \ |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
76 __asm __volatile ( \ |
448 | 77 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
78 "psrlw $15, %%" #regd " \n\t" \ | |
79 "packuswb %%" #regd ", %%" #regd " \n\t" ::) | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
80 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
81 #define MOVQ_WTWO(regd) \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
82 __asm __volatile ( \ |
448 | 83 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
84 "psrlw $15, %%" #regd " \n\t" \ | |
85 "psllw $1, %%" #regd " \n\t"::) | |
387 | 86 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
87 #endif |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
88 |
448 | 89 // using regr as temporary and for the output result |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
90 // first argument is unmodifed and second is trashed |
471 | 91 // regfe is supposed to contain 0xfefefefefefefefe |
92 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \ | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
93 "movq " #rega ", " #regr " \n\t"\ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
94 "pand " #regb ", " #regr " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
95 "pxor " #rega ", " #regb " \n\t"\ |
471 | 96 "pand " #regfe "," #regb " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
97 "psrlq $1, " #regb " \n\t"\ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
98 "paddb " #regb ", " #regr " \n\t" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
99 |
471 | 100 #define PAVGB_MMX(rega, regb, regr, regfe) \ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
101 "movq " #rega ", " #regr " \n\t"\ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
102 "por " #regb ", " #regr " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
103 "pxor " #rega ", " #regb " \n\t"\ |
471 | 104 "pand " #regfe "," #regb " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
105 "psrlq $1, " #regb " \n\t"\ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
106 "psubb " #regb ", " #regr " \n\t" |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
107 |
471 | 108 // mm6 is supposed to contain 0xfefefefefefefefe |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
109 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
110 "movq " #rega ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
111 "movq " #regc ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
112 "pand " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
113 "pand " #regd ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
114 "pxor " #rega ", " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
115 "pxor " #regc ", " #regd " \n\t"\ |
448 | 116 "pand %%mm6, " #regb " \n\t"\ |
117 "pand %%mm6, " #regd " \n\t"\ | |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
118 "psrlq $1, " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
119 "psrlq $1, " #regd " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
120 "paddb " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
121 "paddb " #regd ", " #regp " \n\t" |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
122 |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
123 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
124 "movq " #rega ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
125 "movq " #regc ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
126 "por " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
127 "por " #regd ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
128 "pxor " #rega ", " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
129 "pxor " #regc ", " #regd " \n\t"\ |
448 | 130 "pand %%mm6, " #regb " \n\t"\ |
131 "pand %%mm6, " #regd " \n\t"\ | |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
132 "psrlq $1, " #regd " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
133 "psrlq $1, " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
134 "psubb " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
135 "psubb " #regd ", " #regp " \n\t" |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
136 |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
137 /***********************************/ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
138 /* MMX no rounding */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
139 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx |
448 | 140 #define SET_RND MOVQ_WONE |
141 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) | |
471 | 142 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
143 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
144 #include "dsputil_mmx_rnd.h" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
145 |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
146 #undef DEF |
448 | 147 #undef SET_RND |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
148 #undef PAVGBP |
471 | 149 #undef PAVGB |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
150 /***********************************/ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
151 /* MMX rounding */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
152 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
153 #define DEF(x, y) x ## _ ## y ##_mmx |
448 | 154 #define SET_RND MOVQ_WTWO |
155 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) | |
471 | 156 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
157 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
158 #include "dsputil_mmx_rnd.h" |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
159 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
160 #undef DEF |
448 | 161 #undef SET_RND |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
162 #undef PAVGBP |
471 | 163 #undef PAVGB |
387 | 164 |
0 | 165 /***********************************/ |
166 /* 3Dnow specific */ | |
167 | |
168 #define DEF(x) x ## _3dnow | |
169 /* for Athlons PAVGUSB is prefered */ | |
170 #define PAVGB "pavgusb" | |
171 | |
172 #include "dsputil_mmx_avg.h" | |
173 | |
174 #undef DEF | |
175 #undef PAVGB | |
176 | |
177 /***********************************/ | |
178 /* MMX2 specific */ | |
179 | |
386 | 180 #define DEF(x) x ## _mmx2 |
0 | 181 |
182 /* Introduced only in MMX2 set */ | |
183 #define PAVGB "pavgb" | |
184 | |
185 #include "dsputil_mmx_avg.h" | |
186 | |
187 #undef DEF | |
188 #undef PAVGB | |
189 | |
190 /***********************************/ | |
191 /* standard MMX */ | |
192 | |
193 static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) | |
194 { | |
386 | 195 asm volatile( |
196 "movl $-128, %%eax \n\t" | |
197 "pxor %%mm7, %%mm7 \n\t" | |
198 ".balign 16 \n\t" | |
199 "1: \n\t" | |
200 "movq (%0), %%mm0 \n\t" | |
201 "movq (%0, %2), %%mm2 \n\t" | |
202 "movq %%mm0, %%mm1 \n\t" | |
203 "movq %%mm2, %%mm3 \n\t" | |
204 "punpcklbw %%mm7, %%mm0 \n\t" | |
205 "punpckhbw %%mm7, %%mm1 \n\t" | |
206 "punpcklbw %%mm7, %%mm2 \n\t" | |
207 "punpckhbw %%mm7, %%mm3 \n\t" | |
208 "movq %%mm0, (%1, %%eax)\n\t" | |
209 "movq %%mm1, 8(%1, %%eax)\n\t" | |
210 "movq %%mm2, 16(%1, %%eax)\n\t" | |
211 "movq %%mm3, 24(%1, %%eax)\n\t" | |
212 "addl %3, %0 \n\t" | |
213 "addl $32, %%eax \n\t" | |
214 "js 1b \n\t" | |
215 : "+r" (pixels) | |
216 : "r" (block+64), "r" (line_size), "r" (line_size*2) | |
217 : "%eax" | |
218 ); | |
0 | 219 } |
220 | |
324 | 221 static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) |
222 { | |
223 asm volatile( | |
386 | 224 "pxor %%mm7, %%mm7 \n\t" |
225 "movl $-128, %%eax \n\t" | |
324 | 226 ".balign 16 \n\t" |
227 "1: \n\t" | |
228 "movq (%0), %%mm0 \n\t" | |
229 "movq (%1), %%mm2 \n\t" | |
230 "movq %%mm0, %%mm1 \n\t" | |
231 "movq %%mm2, %%mm3 \n\t" | |
232 "punpcklbw %%mm7, %%mm0 \n\t" | |
233 "punpckhbw %%mm7, %%mm1 \n\t" | |
234 "punpcklbw %%mm7, %%mm2 \n\t" | |
235 "punpckhbw %%mm7, %%mm3 \n\t" | |
236 "psubw %%mm2, %%mm0 \n\t" | |
237 "psubw %%mm3, %%mm1 \n\t" | |
238 "movq %%mm0, (%2, %%eax)\n\t" | |
239 "movq %%mm1, 8(%2, %%eax)\n\t" | |
240 "addl %3, %0 \n\t" | |
241 "addl %3, %1 \n\t" | |
242 "addl $16, %%eax \n\t" | |
243 "jnz 1b \n\t" | |
244 : "+r" (s1), "+r" (s2) | |
245 : "r" (block+64), "r" (stride) | |
246 : "%eax" | |
247 ); | |
248 } | |
249 | |
0 | 250 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
251 { | |
252 const DCTELEM *p; | |
253 UINT8 *pix; | |
254 | |
255 /* read the pixels */ | |
256 p = block; | |
257 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
258 /* unrolled loop */ |
0 | 259 __asm __volatile( |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
260 "movq %3, %%mm0\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
261 "movq 8%3, %%mm1\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
262 "movq 16%3, %%mm2\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
263 "movq 24%3, %%mm3\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
264 "movq 32%3, %%mm4\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
265 "movq 40%3, %%mm5\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
266 "movq 48%3, %%mm6\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
267 "movq 56%3, %%mm7\n\t" |
0 | 268 "packuswb %%mm1, %%mm0\n\t" |
269 "packuswb %%mm3, %%mm2\n\t" | |
270 "packuswb %%mm5, %%mm4\n\t" | |
271 "packuswb %%mm7, %%mm6\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
272 "movq %%mm0, (%0)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
273 "movq %%mm2, (%0, %1)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
274 "movq %%mm4, (%0, %1, 2)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
275 "movq %%mm6, (%0, %2)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
276 ::"r" (pix), "r" (line_size), "r" (line_size*3), "m"(*p) |
0 | 277 :"memory"); |
278 pix += line_size*4; | |
279 p += 32; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
280 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
281 // if here would be an exact copy of the code above |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
282 // compiler would generate some very strange code |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
283 // thus using "r" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
284 __asm __volatile( |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
285 "movq (%3), %%mm0\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
286 "movq 8(%3), %%mm1\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
287 "movq 16(%3), %%mm2\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
288 "movq 24(%3), %%mm3\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
289 "movq 32(%3), %%mm4\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
290 "movq 40(%3), %%mm5\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
291 "movq 48(%3), %%mm6\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
292 "movq 56(%3), %%mm7\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
293 "packuswb %%mm1, %%mm0\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
294 "packuswb %%mm3, %%mm2\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
295 "packuswb %%mm5, %%mm4\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
296 "packuswb %%mm7, %%mm6\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
297 "movq %%mm0, (%0)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
298 "movq %%mm2, (%0, %1)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
299 "movq %%mm4, (%0, %1, 2)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
300 "movq %%mm6, (%0, %2)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
301 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
302 :"memory"); |
0 | 303 } |
304 | |
305 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |
306 { | |
307 const DCTELEM *p; | |
308 UINT8 *pix; | |
309 int i; | |
310 | |
311 /* read the pixels */ | |
312 p = block; | |
313 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
314 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
315 i = 4; |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
316 do { |
0 | 317 __asm __volatile( |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
318 "movq (%2), %%mm0\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
319 "movq 8(%2), %%mm1\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
320 "movq 16(%2), %%mm2\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
321 "movq 24(%2), %%mm3\n\t" |
0 | 322 "movq %0, %%mm4\n\t" |
323 "movq %1, %%mm6\n\t" | |
324 "movq %%mm4, %%mm5\n\t" | |
325 "punpcklbw %%mm7, %%mm4\n\t" | |
326 "punpckhbw %%mm7, %%mm5\n\t" | |
327 "paddsw %%mm4, %%mm0\n\t" | |
328 "paddsw %%mm5, %%mm1\n\t" | |
329 "movq %%mm6, %%mm5\n\t" | |
330 "punpcklbw %%mm7, %%mm6\n\t" | |
331 "punpckhbw %%mm7, %%mm5\n\t" | |
332 "paddsw %%mm6, %%mm2\n\t" | |
333 "paddsw %%mm5, %%mm3\n\t" | |
334 "packuswb %%mm1, %%mm0\n\t" | |
335 "packuswb %%mm3, %%mm2\n\t" | |
336 "movq %%mm0, %0\n\t" | |
337 "movq %%mm2, %1\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
338 :"+m"(*pix), "+m"(*(pix+line_size)) |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
339 :"r"(p) |
0 | 340 :"memory"); |
341 pix += line_size*2; | |
342 p += 16; | |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
343 } while (--i); |
0 | 344 } |
345 | |
346 static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
347 { | |
471 | 348 __asm __volatile( |
420 | 349 "lea (%3, %3), %%eax \n\t" |
422 | 350 ".balign 8 \n\t" |
420 | 351 "1: \n\t" |
352 "movq (%1), %%mm0 \n\t" | |
353 "movq (%1, %3), %%mm1 \n\t" | |
354 "movq %%mm0, (%2) \n\t" | |
355 "movq %%mm1, (%2, %3) \n\t" | |
356 "addl %%eax, %1 \n\t" | |
357 "addl %%eax, %2 \n\t" | |
358 "movq (%1), %%mm0 \n\t" | |
359 "movq (%1, %3), %%mm1 \n\t" | |
360 "movq %%mm0, (%2) \n\t" | |
361 "movq %%mm1, (%2, %3) \n\t" | |
362 "addl %%eax, %1 \n\t" | |
363 "addl %%eax, %2 \n\t" | |
364 "subl $4, %0 \n\t" | |
365 "jnz 1b \n\t" | |
366 : "+g"(h), "+r" (pixels), "+r" (block) | |
367 : "r"(line_size) | |
368 : "%eax", "memory" | |
369 ); | |
0 | 370 } |
371 | |
296 | 372 static void clear_blocks_mmx(DCTELEM *blocks) |
373 { | |
471 | 374 __asm __volatile( |
296 | 375 "pxor %%mm7, %%mm7 \n\t" |
376 "movl $-128*6, %%eax \n\t" | |
377 "1: \n\t" | |
378 "movq %%mm7, (%0, %%eax) \n\t" | |
379 "movq %%mm7, 8(%0, %%eax) \n\t" | |
380 "movq %%mm7, 16(%0, %%eax) \n\t" | |
381 "movq %%mm7, 24(%0, %%eax) \n\t" | |
382 "addl $32, %%eax \n\t" | |
383 " js 1b \n\t" | |
384 : : "r" (((int)blocks)+128*6) | |
385 : "%eax" | |
386 ); | |
387 } | |
388 | |
393 | 389 #if 0 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
390 static void just_return() { return; } |
393 | 391 #endif |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
392 |
0 | 393 void dsputil_init_mmx(void) |
394 { | |
395 mm_flags = mm_support(); | |
188 | 396 #if 1 |
397 printf("libavcodec: CPU flags:"); | |
0 | 398 if (mm_flags & MM_MMX) |
399 printf(" mmx"); | |
400 if (mm_flags & MM_MMXEXT) | |
401 printf(" mmxext"); | |
402 if (mm_flags & MM_3DNOW) | |
403 printf(" 3dnow"); | |
404 if (mm_flags & MM_SSE) | |
405 printf(" sse"); | |
406 if (mm_flags & MM_SSE2) | |
407 printf(" sse2"); | |
408 printf("\n"); | |
409 #endif | |
410 | |
411 if (mm_flags & MM_MMX) { | |
412 get_pixels = get_pixels_mmx; | |
324 | 413 diff_pixels = diff_pixels_mmx; |
0 | 414 put_pixels_clamped = put_pixels_clamped_mmx; |
415 add_pixels_clamped = add_pixels_clamped_mmx; | |
296 | 416 clear_blocks= clear_blocks_mmx; |
415 | 417 |
294 | 418 pix_abs16x16 = pix_abs16x16_mmx; |
419 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |
420 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |
0 | 421 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
294 | 422 pix_abs8x8 = pix_abs8x8_mmx; |
423 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |
424 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |
425 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |
574 | 426 |
0 | 427 put_pixels_tab[0] = put_pixels_mmx; |
428 put_pixels_tab[1] = put_pixels_x2_mmx; | |
429 put_pixels_tab[2] = put_pixels_y2_mmx; | |
430 put_pixels_tab[3] = put_pixels_xy2_mmx; | |
431 | |
432 put_no_rnd_pixels_tab[0] = put_pixels_mmx; | |
433 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
434 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
435 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx; | |
415 | 436 |
0 | 437 avg_pixels_tab[0] = avg_pixels_mmx; |
438 avg_pixels_tab[1] = avg_pixels_x2_mmx; | |
439 avg_pixels_tab[2] = avg_pixels_y2_mmx; | |
440 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
441 | |
442 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx; | |
443 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx; | |
444 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx; | |
445 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx; | |
386 | 446 |
0 | 447 if (mm_flags & MM_MMXEXT) { |
294 | 448 pix_abs16x16 = pix_abs16x16_mmx2; |
449 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; | |
450 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; | |
451 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; | |
415 | 452 |
294 | 453 pix_abs8x8 = pix_abs8x8_mmx2; |
454 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; | |
455 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; | |
456 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; | |
386 | 457 |
458 put_pixels_tab[1] = put_pixels_x2_mmx2; | |
459 put_pixels_tab[2] = put_pixels_y2_mmx2; | |
460 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2; | |
461 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2; | |
415 | 462 |
386 | 463 avg_pixels_tab[0] = avg_pixels_mmx2; |
464 avg_pixels_tab[1] = avg_pixels_x2_mmx2; | |
465 avg_pixels_tab[2] = avg_pixels_y2_mmx2; | |
466 avg_pixels_tab[3] = avg_pixels_xy2_mmx2; | |
0 | 467 } else if (mm_flags & MM_3DNOW) { |
468 put_pixels_tab[1] = put_pixels_x2_3dnow; | |
469 put_pixels_tab[2] = put_pixels_y2_3dnow; | |
386 | 470 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_3dnow; |
471 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_3dnow; | |
393 | 472 |
0 | 473 avg_pixels_tab[0] = avg_pixels_3dnow; |
474 avg_pixels_tab[1] = avg_pixels_x2_3dnow; | |
475 avg_pixels_tab[2] = avg_pixels_y2_3dnow; | |
476 avg_pixels_tab[3] = avg_pixels_xy2_3dnow; | |
477 } | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
8
diff
changeset
|
478 |
42 | 479 /* idct */ |
480 if (mm_flags & MM_MMXEXT) { | |
481 ff_idct = ff_mmxext_idct; | |
482 } else { | |
483 ff_idct = ff_mmx_idct; | |
484 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
485 #ifdef SIMPLE_IDCT |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
486 // ff_idct = simple_idct; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
487 ff_idct = simple_idct_mmx; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
488 #endif |
0 | 489 } |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
490 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
491 #if 0 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
492 // for speed testing |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
493 get_pixels = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
494 put_pixels_clamped = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
495 add_pixels_clamped = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
496 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
497 pix_abs16x16 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
498 pix_abs16x16_x2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
499 pix_abs16x16_y2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
500 pix_abs16x16_xy2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
501 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
502 put_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
503 put_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
504 put_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
505 put_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
506 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
507 put_no_rnd_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
508 put_no_rnd_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
509 put_no_rnd_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
510 put_no_rnd_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
511 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
512 avg_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
513 avg_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
514 avg_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
515 avg_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
516 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
517 avg_no_rnd_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
518 avg_no_rnd_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
519 avg_no_rnd_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
520 avg_no_rnd_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
521 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
522 //av_fdct = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
523 //ff_idct = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
524 #endif |
0 | 525 } |
402 | 526 |
527 /* remove any non bit exact operation (testing purpose). NOTE that | |
528 this function should be kept as small as possible because it is | |
529 always difficult to test automatically non bit exact cases. */ | |
530 void dsputil_set_bit_exact_mmx(void) | |
531 { | |
532 if (mm_flags & MM_MMX) { | |
533 if (mm_flags & MM_MMXEXT) { | |
534 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
535 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
536 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
574 | 537 |
538 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |
539 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |
540 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | |
541 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |
542 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |
543 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |
402 | 544 } else if (mm_flags & MM_3DNOW) { |
545 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
546 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
547 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
548 } | |
549 } | |
550 } |