Mercurial > libavcodec.hg
annotate i386/dsputil_mmx.c @ 445:62c01dbdc1e0 libavcodec
* code with new PAVGB for MMX only CPU splited into separate file
and being compiled in the same way as _avg.h
* PAVG_MMX macros accept also output parameter
* implemented faster put_pixels_xy2, but it has slightly smaller precission.
But there is not visible difference in the image quality - might be eventualy
easily switched back (#if 0 #endif)- please check
author | kabi |
---|---|
date | Wed, 29 May 2002 17:16:22 +0000 |
parents | a5edef76dac6 |
children | efe0c0d40577 |
rev | line source |
---|---|
0 | 1 /* |
2 * MMX optimized DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
0 | 4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
0 | 9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
0 | 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
0 | 14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
0 | 18 * |
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
20 */ | |
21 | |
22 #include "../dsputil.h" | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
23 #include "../simple_idct.h" |
0 | 24 |
5 | 25 int mm_flags; /* multimedia extension flags */ |
26 | |
294 | 27 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
28 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
29 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
30 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
31 | |
32 int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
33 int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
34 int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
35 int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
36 | |
37 int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
38 int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
39 int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
40 int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
41 | |
42 int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
43 int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
44 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
45 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
46 | |
42 | 47 /* external functions, from idct_mmx.c */ |
48 void ff_mmx_idct(DCTELEM *block); | |
49 void ff_mmxext_idct(DCTELEM *block); | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
8
diff
changeset
|
50 |
0 | 51 /* pixel operations */ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
52 static const uint64_t mm_bfe __attribute__ ((aligned(8))) = 0xfefefefefefefefeULL; |
387 | 53 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; |
54 static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; | |
55 static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL; | |
8 | 56 //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; |
57 //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; | |
0 | 58 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
59 #define JUMPALIGN() __asm __volatile (".balign 8"::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
60 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
61 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
62 #ifndef PIC |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
63 #define MOVQ_WONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wone)) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
64 #define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo)) |
387 | 65 #define MOVQ_BONE(regd) "movq "MANGLE(mm_bone)", "#regd" \n\t" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
66 #define MOVQ_BFE(regd) "movq "MANGLE(mm_bfe)", "#regd" \n\t" |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
67 #else |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
68 // for shared library it's better to use this way for accessing constants |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
69 // pcmpeqd -> -1 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
70 #define MOVQ_WONE(regd) \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
71 __asm __volatile ( \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
72 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
73 "psrlw $15, %%" #regd ::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
74 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
75 #define MOVQ_WTWO(regd) \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
76 __asm __volatile ( \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
77 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
78 "psrlw $15, %%" #regd " \n\t" \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
79 "psllw $1, %%" #regd ::) |
387 | 80 |
81 #define MOVQ_BONE(regd) \ | |
82 "pcmpeqd " #regd ", " #regd " \n\t" \ | |
83 "psrlw $15, " #regd " \n\t"\ | |
84 "packuswb " #regd ", " #regd " \n\t" | |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
85 |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
86 #define MOVQ_BFE(regd) \ |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
87 "pcmpeqd " #regd ", " #regd " \n\t"\ |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
88 "paddb " #regd ", " #regd " \n\t" |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
89 #endif |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
90 |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
91 // using mm6 as temporary and for the output result |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
92 // first argument is unmodifed and second is trashed |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
93 // mm7 is supposed to contain 0xfefefefefefefefe |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
94 #define PAVGB_MMX_NO_RND(rega, regb, regr) \ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
95 "movq " #rega ", " #regr " \n\t"\ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
96 "pand " #regb ", " #regr " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
97 "pxor " #rega ", " #regb " \n\t"\ |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
98 "pand %%mm7, " #regb " \n\t"\ |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
99 "psrlq $1, " #regb " \n\t"\ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
100 "paddb " #regb ", " #regr " \n\t" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
101 |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
102 #define PAVGB_MMX(rega, regb, regr) \ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
103 "movq " #rega ", " #regr " \n\t"\ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
104 "por " #regb ", " #regr " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
105 "pxor " #rega ", " #regb " \n\t"\ |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
106 "pand %%mm7, " #regb " \n\t"\ |
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
107 "psrlq $1, " #regb " \n\t"\ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
108 "psubb " #regb ", " #regr " \n\t" |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
109 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
110 /***********************************/ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
111 /* MMX no rounding */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
112 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
113 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
114 #define PAVGB(a, b) PAVGB_MMX_NO_RND(a, b, %%mm6) |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
115 #define PAVGBR(a, b, c) PAVGB_MMX_NO_RND(a, b, c) |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
116 #include "dsputil_mmx_rnd.h" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
117 |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
118 #undef DEF |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
119 #undef PAVGB |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
120 #undef PAVGBR |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
121 /***********************************/ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
122 /* MMX rounding */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
123 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
124 #define DEF(x, y) x ## _ ## y ##_mmx |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
125 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
126 #define PAVGB(a, b) PAVGB_MMX(a, b, %%mm6) |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
127 #define PAVGBR(a, b, c) PAVGB_MMX(a, b, c) |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
128 #include "dsputil_mmx_rnd.h" |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
129 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
130 #undef DEF |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
131 #undef PAVGB |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
132 #undef PAVGBR |
387 | 133 |
0 | 134 /***********************************/ |
135 /* 3Dnow specific */ | |
136 | |
137 #define DEF(x) x ## _3dnow | |
138 /* for Athlons PAVGUSB is prefered */ | |
139 #define PAVGB "pavgusb" | |
140 | |
141 #include "dsputil_mmx_avg.h" | |
142 | |
143 #undef DEF | |
144 #undef PAVGB | |
145 | |
146 /***********************************/ | |
147 /* MMX2 specific */ | |
148 | |
386 | 149 #define DEF(x) x ## _mmx2 |
0 | 150 |
151 /* Introduced only in MMX2 set */ | |
152 #define PAVGB "pavgb" | |
153 | |
154 #include "dsputil_mmx_avg.h" | |
155 | |
156 #undef DEF | |
157 #undef PAVGB | |
158 | |
159 /***********************************/ | |
160 /* standard MMX */ | |
161 | |
162 static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) | |
163 { | |
386 | 164 asm volatile( |
165 "movl $-128, %%eax \n\t" | |
166 "pxor %%mm7, %%mm7 \n\t" | |
167 ".balign 16 \n\t" | |
168 "1: \n\t" | |
169 "movq (%0), %%mm0 \n\t" | |
170 "movq (%0, %2), %%mm2 \n\t" | |
171 "movq %%mm0, %%mm1 \n\t" | |
172 "movq %%mm2, %%mm3 \n\t" | |
173 "punpcklbw %%mm7, %%mm0 \n\t" | |
174 "punpckhbw %%mm7, %%mm1 \n\t" | |
175 "punpcklbw %%mm7, %%mm2 \n\t" | |
176 "punpckhbw %%mm7, %%mm3 \n\t" | |
177 "movq %%mm0, (%1, %%eax)\n\t" | |
178 "movq %%mm1, 8(%1, %%eax)\n\t" | |
179 "movq %%mm2, 16(%1, %%eax)\n\t" | |
180 "movq %%mm3, 24(%1, %%eax)\n\t" | |
181 "addl %3, %0 \n\t" | |
182 "addl $32, %%eax \n\t" | |
183 "js 1b \n\t" | |
184 : "+r" (pixels) | |
185 : "r" (block+64), "r" (line_size), "r" (line_size*2) | |
186 : "%eax" | |
187 ); | |
0 | 188 } |
189 | |
324 | 190 static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) |
191 { | |
192 asm volatile( | |
386 | 193 "pxor %%mm7, %%mm7 \n\t" |
194 "movl $-128, %%eax \n\t" | |
324 | 195 ".balign 16 \n\t" |
196 "1: \n\t" | |
197 "movq (%0), %%mm0 \n\t" | |
198 "movq (%1), %%mm2 \n\t" | |
199 "movq %%mm0, %%mm1 \n\t" | |
200 "movq %%mm2, %%mm3 \n\t" | |
201 "punpcklbw %%mm7, %%mm0 \n\t" | |
202 "punpckhbw %%mm7, %%mm1 \n\t" | |
203 "punpcklbw %%mm7, %%mm2 \n\t" | |
204 "punpckhbw %%mm7, %%mm3 \n\t" | |
205 "psubw %%mm2, %%mm0 \n\t" | |
206 "psubw %%mm3, %%mm1 \n\t" | |
207 "movq %%mm0, (%2, %%eax)\n\t" | |
208 "movq %%mm1, 8(%2, %%eax)\n\t" | |
209 "addl %3, %0 \n\t" | |
210 "addl %3, %1 \n\t" | |
211 "addl $16, %%eax \n\t" | |
212 "jnz 1b \n\t" | |
213 : "+r" (s1), "+r" (s2) | |
214 : "r" (block+64), "r" (stride) | |
215 : "%eax" | |
216 ); | |
217 } | |
218 | |
0 | 219 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
220 { | |
221 const DCTELEM *p; | |
222 UINT8 *pix; | |
223 | |
224 /* read the pixels */ | |
225 p = block; | |
226 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
227 /* unrolled loop */ |
0 | 228 __asm __volatile( |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
229 "movq %3, %%mm0\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
230 "movq 8%3, %%mm1\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
231 "movq 16%3, %%mm2\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
232 "movq 24%3, %%mm3\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
233 "movq 32%3, %%mm4\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
234 "movq 40%3, %%mm5\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
235 "movq 48%3, %%mm6\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
236 "movq 56%3, %%mm7\n\t" |
0 | 237 "packuswb %%mm1, %%mm0\n\t" |
238 "packuswb %%mm3, %%mm2\n\t" | |
239 "packuswb %%mm5, %%mm4\n\t" | |
240 "packuswb %%mm7, %%mm6\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
241 "movq %%mm0, (%0)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
242 "movq %%mm2, (%0, %1)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
243 "movq %%mm4, (%0, %1, 2)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
244 "movq %%mm6, (%0, %2)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
245 ::"r" (pix), "r" (line_size), "r" (line_size*3), "m"(*p) |
0 | 246 :"memory"); |
247 pix += line_size*4; | |
248 p += 32; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
249 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
250 // if here would be an exact copy of the code above |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
251 // compiler would generate some very strange code |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
252 // thus using "r" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
253 __asm __volatile( |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
254 "movq (%3), %%mm0\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
255 "movq 8(%3), %%mm1\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
256 "movq 16(%3), %%mm2\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
257 "movq 24(%3), %%mm3\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
258 "movq 32(%3), %%mm4\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
259 "movq 40(%3), %%mm5\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
260 "movq 48(%3), %%mm6\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
261 "movq 56(%3), %%mm7\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
262 "packuswb %%mm1, %%mm0\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
263 "packuswb %%mm3, %%mm2\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
264 "packuswb %%mm5, %%mm4\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
265 "packuswb %%mm7, %%mm6\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
266 "movq %%mm0, (%0)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
267 "movq %%mm2, (%0, %1)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
268 "movq %%mm4, (%0, %1, 2)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
269 "movq %%mm6, (%0, %2)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
270 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
271 :"memory"); |
0 | 272 } |
273 | |
274 static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) | |
275 { | |
276 const DCTELEM *p; | |
277 UINT8 *pix; | |
278 int i; | |
279 | |
280 /* read the pixels */ | |
281 p = block; | |
282 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
283 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
284 i = 4; |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
285 do { |
0 | 286 __asm __volatile( |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
287 "movq (%2), %%mm0\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
288 "movq 8(%2), %%mm1\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
289 "movq 16(%2), %%mm2\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
290 "movq 24(%2), %%mm3\n\t" |
0 | 291 "movq %0, %%mm4\n\t" |
292 "movq %1, %%mm6\n\t" | |
293 "movq %%mm4, %%mm5\n\t" | |
294 "punpcklbw %%mm7, %%mm4\n\t" | |
295 "punpckhbw %%mm7, %%mm5\n\t" | |
296 "paddsw %%mm4, %%mm0\n\t" | |
297 "paddsw %%mm5, %%mm1\n\t" | |
298 "movq %%mm6, %%mm5\n\t" | |
299 "punpcklbw %%mm7, %%mm6\n\t" | |
300 "punpckhbw %%mm7, %%mm5\n\t" | |
301 "paddsw %%mm6, %%mm2\n\t" | |
302 "paddsw %%mm5, %%mm3\n\t" | |
303 "packuswb %%mm1, %%mm0\n\t" | |
304 "packuswb %%mm3, %%mm2\n\t" | |
305 "movq %%mm0, %0\n\t" | |
306 "movq %%mm2, %1\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
307 :"+m"(*pix), "+m"(*(pix+line_size)) |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
308 :"r"(p) |
0 | 309 :"memory"); |
310 pix += line_size*2; | |
311 p += 16; | |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
312 } while (--i); |
0 | 313 } |
314 | |
315 static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
316 { | |
420 | 317 asm volatile |
318 ( | |
319 "lea (%3, %3), %%eax \n\t" | |
422 | 320 ".balign 8 \n\t" |
420 | 321 "1: \n\t" |
322 "movq (%1), %%mm0 \n\t" | |
323 "movq (%1, %3), %%mm1 \n\t" | |
324 "movq %%mm0, (%2) \n\t" | |
325 "movq %%mm1, (%2, %3) \n\t" | |
326 "addl %%eax, %1 \n\t" | |
327 "addl %%eax, %2 \n\t" | |
328 "movq (%1), %%mm0 \n\t" | |
329 "movq (%1, %3), %%mm1 \n\t" | |
330 "movq %%mm0, (%2) \n\t" | |
331 "movq %%mm1, (%2, %3) \n\t" | |
332 "addl %%eax, %1 \n\t" | |
333 "addl %%eax, %2 \n\t" | |
334 "subl $4, %0 \n\t" | |
335 "jnz 1b \n\t" | |
336 : "+g"(h), "+r" (pixels), "+r" (block) | |
337 : "r"(line_size) | |
338 : "%eax", "memory" | |
339 ); | |
0 | 340 } |
341 | |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
342 #if 0 |
0 | 343 static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
344 { | |
345 UINT8 *p; | |
346 const UINT8 *pix; | |
347 p = block; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
348 pix = pixels; // 1s |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
349 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
350 MOVQ_WTWO(mm6); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
351 JUMPALIGN(); |
0 | 352 do { |
353 __asm __volatile( | |
354 "movq %1, %%mm0\n\t" | |
355 "movq %2, %%mm1\n\t" | |
356 "movq 1%1, %%mm4\n\t" | |
357 "movq 1%2, %%mm5\n\t" | |
358 "movq %%mm0, %%mm2\n\t" | |
359 "movq %%mm1, %%mm3\n\t" | |
360 "punpcklbw %%mm7, %%mm0\n\t" | |
361 "punpcklbw %%mm7, %%mm1\n\t" | |
362 "punpckhbw %%mm7, %%mm2\n\t" | |
363 "punpckhbw %%mm7, %%mm3\n\t" | |
364 "paddusw %%mm1, %%mm0\n\t" | |
365 "paddusw %%mm3, %%mm2\n\t" | |
366 "movq %%mm4, %%mm1\n\t" | |
367 "movq %%mm5, %%mm3\n\t" | |
368 "punpcklbw %%mm7, %%mm4\n\t" | |
369 "punpcklbw %%mm7, %%mm5\n\t" | |
370 "punpckhbw %%mm7, %%mm1\n\t" | |
371 "punpckhbw %%mm7, %%mm3\n\t" | |
372 "paddusw %%mm5, %%mm4\n\t" | |
373 "paddusw %%mm3, %%mm1\n\t" | |
374 "paddusw %%mm6, %%mm4\n\t" | |
375 "paddusw %%mm6, %%mm1\n\t" | |
376 "paddusw %%mm4, %%mm0\n\t" | |
377 "paddusw %%mm1, %%mm2\n\t" | |
378 "psrlw $2, %%mm0\n\t" | |
379 "psrlw $2, %%mm2\n\t" | |
380 "packuswb %%mm2, %%mm0\n\t" | |
381 "movq %%mm0, %0\n\t" | |
382 :"=m"(*p) | |
383 :"m"(*pix), | |
384 "m"(*(pix+line_size)) | |
385 :"memory"); | |
386 pix += line_size; | |
387 p += line_size; | |
388 } while(--h); | |
389 } | |
390 | |
391 static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
392 { | |
393 UINT8 *p; | |
394 const UINT8 *pix; | |
395 p = block; | |
396 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
397 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
398 MOVQ_WONE(mm6); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
399 JUMPALIGN(); |
0 | 400 do { |
401 __asm __volatile( | |
402 "movq %1, %%mm0\n\t" | |
403 "movq %2, %%mm1\n\t" | |
404 "movq 1%1, %%mm4\n\t" | |
405 "movq 1%2, %%mm5\n\t" | |
406 "movq %%mm0, %%mm2\n\t" | |
407 "movq %%mm1, %%mm3\n\t" | |
408 "punpcklbw %%mm7, %%mm0\n\t" | |
409 "punpcklbw %%mm7, %%mm1\n\t" | |
410 "punpckhbw %%mm7, %%mm2\n\t" | |
411 "punpckhbw %%mm7, %%mm3\n\t" | |
412 "paddusw %%mm1, %%mm0\n\t" | |
413 "paddusw %%mm3, %%mm2\n\t" | |
414 "movq %%mm4, %%mm1\n\t" | |
415 "movq %%mm5, %%mm3\n\t" | |
416 "punpcklbw %%mm7, %%mm4\n\t" | |
417 "punpcklbw %%mm7, %%mm5\n\t" | |
418 "punpckhbw %%mm7, %%mm1\n\t" | |
419 "punpckhbw %%mm7, %%mm3\n\t" | |
420 "paddusw %%mm5, %%mm4\n\t" | |
421 "paddusw %%mm3, %%mm1\n\t" | |
422 "paddusw %%mm6, %%mm4\n\t" | |
423 "paddusw %%mm6, %%mm1\n\t" | |
424 "paddusw %%mm4, %%mm0\n\t" | |
425 "paddusw %%mm1, %%mm2\n\t" | |
426 "psrlw $2, %%mm0\n\t" | |
427 "psrlw $2, %%mm2\n\t" | |
428 "packuswb %%mm2, %%mm0\n\t" | |
429 "movq %%mm0, %0\n\t" | |
430 :"=m"(*p) | |
431 :"m"(*pix), | |
432 "m"(*(pix+line_size)) | |
433 :"memory"); | |
434 pix += line_size; | |
435 p += line_size; | |
436 } while(--h); | |
437 } | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
438 #endif |
0 | 439 static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
440 { | |
441 UINT8 *p; | |
442 const UINT8 *pix; | |
443 p = block; | |
444 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
445 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
446 MOVQ_WONE(mm6); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
447 JUMPALIGN(); |
0 | 448 do { |
449 __asm __volatile( | |
450 "movq %0, %%mm0\n\t" | |
451 "movq %1, %%mm1\n\t" | |
452 "movq %%mm0, %%mm2\n\t" | |
453 "movq %%mm1, %%mm3\n\t" | |
454 "punpcklbw %%mm7, %%mm0\n\t" | |
455 "punpcklbw %%mm7, %%mm1\n\t" | |
456 "punpckhbw %%mm7, %%mm2\n\t" | |
457 "punpckhbw %%mm7, %%mm3\n\t" | |
458 "paddusw %%mm1, %%mm0\n\t" | |
459 "paddusw %%mm3, %%mm2\n\t" | |
460 "paddusw %%mm6, %%mm0\n\t" | |
461 "paddusw %%mm6, %%mm2\n\t" | |
462 "psrlw $1, %%mm0\n\t" | |
463 "psrlw $1, %%mm2\n\t" | |
464 "packuswb %%mm2, %%mm0\n\t" | |
465 "movq %%mm0, %0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
466 :"+m"(*p) |
0 | 467 :"m"(*pix) |
468 :"memory"); | |
469 pix += line_size; | |
470 p += line_size; | |
471 } | |
472 while (--h); | |
473 } | |
474 | |
475 static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
476 { | |
477 UINT8 *p; | |
478 const UINT8 *pix; | |
479 p = block; | |
480 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
481 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
482 MOVQ_WONE(mm6); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
483 JUMPALIGN(); |
0 | 484 do { |
485 __asm __volatile( | |
486 "movq %1, %%mm1\n\t" | |
487 "movq %0, %%mm0\n\t" | |
488 "movq 1%1, %%mm4\n\t" | |
489 "movq %%mm0, %%mm2\n\t" | |
490 "movq %%mm1, %%mm3\n\t" | |
491 "movq %%mm4, %%mm5\n\t" | |
492 "punpcklbw %%mm7, %%mm1\n\t" | |
493 "punpckhbw %%mm7, %%mm3\n\t" | |
494 "punpcklbw %%mm7, %%mm4\n\t" | |
495 "punpckhbw %%mm7, %%mm5\n\t" | |
496 "punpcklbw %%mm7, %%mm0\n\t" | |
497 "punpckhbw %%mm7, %%mm2\n\t" | |
498 "paddusw %%mm4, %%mm1\n\t" | |
499 "paddusw %%mm5, %%mm3\n\t" | |
500 "paddusw %%mm6, %%mm1\n\t" | |
501 "paddusw %%mm6, %%mm3\n\t" | |
502 "psrlw $1, %%mm1\n\t" | |
503 "psrlw $1, %%mm3\n\t" | |
504 "paddusw %%mm6, %%mm0\n\t" | |
505 "paddusw %%mm6, %%mm2\n\t" | |
506 "paddusw %%mm1, %%mm0\n\t" | |
507 "paddusw %%mm3, %%mm2\n\t" | |
508 "psrlw $1, %%mm0\n\t" | |
509 "psrlw $1, %%mm2\n\t" | |
510 "packuswb %%mm2, %%mm0\n\t" | |
511 "movq %%mm0, %0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
512 :"+m"(*p) |
0 | 513 :"m"(*pix) |
514 :"memory"); | |
515 pix += line_size; | |
516 p += line_size; | |
517 } while (--h); | |
518 } | |
519 | |
520 static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
521 { | |
522 UINT8 *p; | |
523 const UINT8 *pix; | |
524 p = block; | |
525 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
526 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
527 MOVQ_WONE(mm6); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
528 JUMPALIGN(); |
0 | 529 do { |
530 __asm __volatile( | |
531 "movq %1, %%mm1\n\t" | |
532 "movq %0, %%mm0\n\t" | |
533 "movq %2, %%mm4\n\t" | |
534 "movq %%mm0, %%mm2\n\t" | |
535 "movq %%mm1, %%mm3\n\t" | |
536 "movq %%mm4, %%mm5\n\t" | |
537 "punpcklbw %%mm7, %%mm1\n\t" | |
538 "punpckhbw %%mm7, %%mm3\n\t" | |
539 "punpcklbw %%mm7, %%mm4\n\t" | |
540 "punpckhbw %%mm7, %%mm5\n\t" | |
541 "punpcklbw %%mm7, %%mm0\n\t" | |
542 "punpckhbw %%mm7, %%mm2\n\t" | |
543 "paddusw %%mm4, %%mm1\n\t" | |
544 "paddusw %%mm5, %%mm3\n\t" | |
545 "paddusw %%mm6, %%mm1\n\t" | |
546 "paddusw %%mm6, %%mm3\n\t" | |
547 "psrlw $1, %%mm1\n\t" | |
548 "psrlw $1, %%mm3\n\t" | |
549 "paddusw %%mm6, %%mm0\n\t" | |
550 "paddusw %%mm6, %%mm2\n\t" | |
551 "paddusw %%mm1, %%mm0\n\t" | |
552 "paddusw %%mm3, %%mm2\n\t" | |
553 "psrlw $1, %%mm0\n\t" | |
554 "psrlw $1, %%mm2\n\t" | |
555 "packuswb %%mm2, %%mm0\n\t" | |
556 "movq %%mm0, %0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
557 :"+m"(*p) |
0 | 558 :"m"(*pix), "m"(*(pix+line_size)) |
559 :"memory"); | |
560 pix += line_size; | |
561 p += line_size ; | |
562 } while(--h); | |
563 } | |
564 | |
565 static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
566 { | |
567 UINT8 *p; | |
568 const UINT8 *pix; | |
569 p = block; | |
570 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
571 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
572 // this doesn't seem to be used offten - so |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
573 // the inside usage of mm_wone is not optimized |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
574 MOVQ_WTWO(mm6); |
0 | 575 do { |
576 __asm __volatile( | |
577 "movq %1, %%mm0\n\t" | |
578 "movq %2, %%mm1\n\t" | |
579 "movq 1%1, %%mm4\n\t" | |
580 "movq 1%2, %%mm5\n\t" | |
581 "movq %%mm0, %%mm2\n\t" | |
582 "movq %%mm1, %%mm3\n\t" | |
583 "punpcklbw %%mm7, %%mm0\n\t" | |
584 "punpcklbw %%mm7, %%mm1\n\t" | |
585 "punpckhbw %%mm7, %%mm2\n\t" | |
586 "punpckhbw %%mm7, %%mm3\n\t" | |
587 "paddusw %%mm1, %%mm0\n\t" | |
588 "paddusw %%mm3, %%mm2\n\t" | |
589 "movq %%mm4, %%mm1\n\t" | |
590 "movq %%mm5, %%mm3\n\t" | |
591 "punpcklbw %%mm7, %%mm4\n\t" | |
592 "punpcklbw %%mm7, %%mm5\n\t" | |
593 "punpckhbw %%mm7, %%mm1\n\t" | |
594 "punpckhbw %%mm7, %%mm3\n\t" | |
595 "paddusw %%mm5, %%mm4\n\t" | |
596 "paddusw %%mm3, %%mm1\n\t" | |
597 "paddusw %%mm6, %%mm4\n\t" | |
598 "paddusw %%mm6, %%mm1\n\t" | |
599 "paddusw %%mm4, %%mm0\n\t" | |
600 "paddusw %%mm1, %%mm2\n\t" | |
601 "movq %3, %%mm5\n\t" | |
602 "psrlw $2, %%mm0\n\t" | |
603 "movq %0, %%mm1\n\t" | |
604 "psrlw $2, %%mm2\n\t" | |
605 "movq %%mm1, %%mm3\n\t" | |
606 "punpcklbw %%mm7, %%mm1\n\t" | |
607 "punpckhbw %%mm7, %%mm3\n\t" | |
608 "paddusw %%mm1, %%mm0\n\t" | |
609 "paddusw %%mm3, %%mm2\n\t" | |
610 "paddusw %%mm5, %%mm0\n\t" | |
611 "paddusw %%mm5, %%mm2\n\t" | |
612 "psrlw $1, %%mm0\n\t" | |
613 "psrlw $1, %%mm2\n\t" | |
614 "packuswb %%mm2, %%mm0\n\t" | |
615 "movq %%mm0, %0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
616 :"+m"(*p) |
0 | 617 :"m"(*pix), |
8 | 618 "m"(*(pix+line_size)), "m"(mm_wone) |
0 | 619 :"memory"); |
620 pix += line_size; | |
621 p += line_size ; | |
622 } while(--h); | |
623 } | |
624 | |
625 static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
626 { | |
627 UINT8 *p; | |
628 const UINT8 *pix; | |
629 p = block; | |
630 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
631 MOVQ_ZERO(mm7); |
0 | 632 do { |
633 __asm __volatile( | |
634 "movq %1, %%mm0\n\t" | |
635 "movq %0, %%mm1\n\t" | |
636 "movq %%mm0, %%mm2\n\t" | |
637 "movq %%mm1, %%mm3\n\t" | |
638 "punpcklbw %%mm7, %%mm0\n\t" | |
639 "punpcklbw %%mm7, %%mm1\n\t" | |
640 "punpckhbw %%mm7, %%mm2\n\t" | |
641 "punpckhbw %%mm7, %%mm3\n\t" | |
642 "paddusw %%mm1, %%mm0\n\t" | |
643 "paddusw %%mm3, %%mm2\n\t" | |
644 "psrlw $1, %%mm0\n\t" | |
645 "psrlw $1, %%mm2\n\t" | |
646 "packuswb %%mm2, %%mm0\n\t" | |
647 "movq %%mm0, %0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
648 :"+m"(*p) |
0 | 649 :"m"(*pix) |
650 :"memory"); | |
651 pix += line_size; | |
652 p += line_size ; | |
653 } while (--h); | |
654 } | |
655 | |
656 static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
657 { | |
658 UINT8 *p; | |
659 const UINT8 *pix; | |
660 p = block; | |
661 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
662 MOVQ_ZERO(mm7); |
0 | 663 do { |
664 __asm __volatile( | |
665 "movq %1, %%mm0\n\t" | |
666 "movq 1%1, %%mm1\n\t" | |
667 "movq %0, %%mm4\n\t" | |
668 "movq %%mm0, %%mm2\n\t" | |
669 "movq %%mm1, %%mm3\n\t" | |
670 "movq %%mm4, %%mm5\n\t" | |
671 "punpcklbw %%mm7, %%mm0\n\t" | |
672 "punpcklbw %%mm7, %%mm1\n\t" | |
673 "punpckhbw %%mm7, %%mm2\n\t" | |
674 "punpckhbw %%mm7, %%mm3\n\t" | |
675 "punpcklbw %%mm7, %%mm4\n\t" | |
676 "punpckhbw %%mm7, %%mm5\n\t" | |
677 "paddusw %%mm1, %%mm0\n\t" | |
678 "paddusw %%mm3, %%mm2\n\t" | |
679 "psrlw $1, %%mm0\n\t" | |
680 "psrlw $1, %%mm2\n\t" | |
681 "paddusw %%mm4, %%mm0\n\t" | |
682 "paddusw %%mm5, %%mm2\n\t" | |
683 "psrlw $1, %%mm0\n\t" | |
684 "psrlw $1, %%mm2\n\t" | |
685 "packuswb %%mm2, %%mm0\n\t" | |
686 "movq %%mm0, %0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
687 :"+m"(*p) |
0 | 688 :"m"(*pix) |
689 :"memory"); | |
690 pix += line_size; | |
691 p += line_size; | |
692 } while (--h); | |
693 } | |
694 | |
695 static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
696 { | |
697 UINT8 *p; | |
698 const UINT8 *pix; | |
699 p = block; | |
700 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
701 MOVQ_ZERO(mm7); |
0 | 702 do { |
703 __asm __volatile( | |
704 "movq %1, %%mm0\n\t" | |
705 "movq %2, %%mm1\n\t" | |
706 "movq %0, %%mm4\n\t" | |
707 "movq %%mm0, %%mm2\n\t" | |
708 "movq %%mm1, %%mm3\n\t" | |
709 "movq %%mm4, %%mm5\n\t" | |
710 "punpcklbw %%mm7, %%mm0\n\t" | |
711 "punpcklbw %%mm7, %%mm1\n\t" | |
712 "punpckhbw %%mm7, %%mm2\n\t" | |
713 "punpckhbw %%mm7, %%mm3\n\t" | |
714 "punpcklbw %%mm7, %%mm4\n\t" | |
715 "punpckhbw %%mm7, %%mm5\n\t" | |
716 "paddusw %%mm1, %%mm0\n\t" | |
717 "paddusw %%mm3, %%mm2\n\t" | |
718 "psrlw $1, %%mm0\n\t" | |
719 "psrlw $1, %%mm2\n\t" | |
720 "paddusw %%mm4, %%mm0\n\t" | |
721 "paddusw %%mm5, %%mm2\n\t" | |
722 "psrlw $1, %%mm0\n\t" | |
723 "psrlw $1, %%mm2\n\t" | |
724 "packuswb %%mm2, %%mm0\n\t" | |
725 "movq %%mm0, %0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
726 :"+m"(*p) |
0 | 727 :"m"(*pix), "m"(*(pix+line_size)) |
728 :"memory"); | |
729 pix += line_size; | |
730 p += line_size ; | |
731 } while(--h); | |
732 } | |
733 | |
734 static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
735 { | |
736 UINT8 *p; | |
737 const UINT8 *pix; | |
738 p = block; | |
739 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
740 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
741 MOVQ_WONE(mm6); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
742 JUMPALIGN(); |
0 | 743 do { |
744 __asm __volatile( | |
745 "movq %1, %%mm0\n\t" | |
746 "movq %2, %%mm1\n\t" | |
747 "movq 1%1, %%mm4\n\t" | |
748 "movq 1%2, %%mm5\n\t" | |
749 "movq %%mm0, %%mm2\n\t" | |
750 "movq %%mm1, %%mm3\n\t" | |
751 "punpcklbw %%mm7, %%mm0\n\t" | |
752 "punpcklbw %%mm7, %%mm1\n\t" | |
753 "punpckhbw %%mm7, %%mm2\n\t" | |
754 "punpckhbw %%mm7, %%mm3\n\t" | |
755 "paddusw %%mm1, %%mm0\n\t" | |
756 "paddusw %%mm3, %%mm2\n\t" | |
757 "movq %%mm4, %%mm1\n\t" | |
758 "movq %%mm5, %%mm3\n\t" | |
759 "punpcklbw %%mm7, %%mm4\n\t" | |
760 "punpcklbw %%mm7, %%mm5\n\t" | |
761 "punpckhbw %%mm7, %%mm1\n\t" | |
762 "punpckhbw %%mm7, %%mm3\n\t" | |
763 "paddusw %%mm5, %%mm4\n\t" | |
764 "paddusw %%mm3, %%mm1\n\t" | |
765 "paddusw %%mm6, %%mm4\n\t" | |
766 "paddusw %%mm6, %%mm1\n\t" | |
767 "paddusw %%mm4, %%mm0\n\t" | |
768 "paddusw %%mm1, %%mm2\n\t" | |
769 "movq %0, %%mm1\n\t" | |
770 "psrlw $2, %%mm0\n\t" | |
771 "movq %%mm1, %%mm3\n\t" | |
772 "psrlw $2, %%mm2\n\t" | |
773 "punpcklbw %%mm7, %%mm1\n\t" | |
774 "punpckhbw %%mm7, %%mm3\n\t" | |
775 "paddusw %%mm1, %%mm0\n\t" | |
776 "paddusw %%mm3, %%mm2\n\t" | |
777 "psrlw $1, %%mm0\n\t" | |
778 "psrlw $1, %%mm2\n\t" | |
779 "packuswb %%mm2, %%mm0\n\t" | |
780 "movq %%mm0, %0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
781 :"+m"(*p) |
0 | 782 :"m"(*pix), |
783 "m"(*(pix+line_size)) | |
784 :"memory"); | |
785 pix += line_size; | |
786 p += line_size; | |
787 } while(--h); | |
788 } | |
789 | |
296 | 790 static void clear_blocks_mmx(DCTELEM *blocks) |
791 { | |
792 asm volatile( | |
793 "pxor %%mm7, %%mm7 \n\t" | |
794 "movl $-128*6, %%eax \n\t" | |
795 "1: \n\t" | |
796 "movq %%mm7, (%0, %%eax) \n\t" | |
797 "movq %%mm7, 8(%0, %%eax) \n\t" | |
798 "movq %%mm7, 16(%0, %%eax) \n\t" | |
799 "movq %%mm7, 24(%0, %%eax) \n\t" | |
800 "addl $32, %%eax \n\t" | |
801 " js 1b \n\t" | |
802 : : "r" (((int)blocks)+128*6) | |
803 : "%eax" | |
804 ); | |
805 } | |
806 | |
393 | 807 #if 0 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
808 static void just_return() { return; } |
393 | 809 #endif |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
810 |
0 | 811 void dsputil_init_mmx(void) |
812 { | |
813 mm_flags = mm_support(); | |
188 | 814 #if 1 |
815 printf("libavcodec: CPU flags:"); | |
0 | 816 if (mm_flags & MM_MMX) |
817 printf(" mmx"); | |
818 if (mm_flags & MM_MMXEXT) | |
819 printf(" mmxext"); | |
820 if (mm_flags & MM_3DNOW) | |
821 printf(" 3dnow"); | |
822 if (mm_flags & MM_SSE) | |
823 printf(" sse"); | |
824 if (mm_flags & MM_SSE2) | |
825 printf(" sse2"); | |
826 printf("\n"); | |
827 #endif | |
828 | |
829 if (mm_flags & MM_MMX) { | |
830 get_pixels = get_pixels_mmx; | |
324 | 831 diff_pixels = diff_pixels_mmx; |
0 | 832 put_pixels_clamped = put_pixels_clamped_mmx; |
833 add_pixels_clamped = add_pixels_clamped_mmx; | |
296 | 834 clear_blocks= clear_blocks_mmx; |
415 | 835 |
294 | 836 pix_abs16x16 = pix_abs16x16_mmx; |
837 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | |
838 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | |
0 | 839 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
294 | 840 pix_abs8x8 = pix_abs8x8_mmx; |
841 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |
842 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |
843 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |
0 | 844 av_fdct = fdct_mmx; |
845 | |
846 put_pixels_tab[0] = put_pixels_mmx; | |
847 put_pixels_tab[1] = put_pixels_x2_mmx; | |
848 put_pixels_tab[2] = put_pixels_y2_mmx; | |
849 put_pixels_tab[3] = put_pixels_xy2_mmx; | |
850 | |
851 put_no_rnd_pixels_tab[0] = put_pixels_mmx; | |
852 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
853 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
854 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx; | |
415 | 855 |
0 | 856 avg_pixels_tab[0] = avg_pixels_mmx; |
857 avg_pixels_tab[1] = avg_pixels_x2_mmx; | |
858 avg_pixels_tab[2] = avg_pixels_y2_mmx; | |
859 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
860 | |
861 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx; | |
862 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx; | |
863 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx; | |
864 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx; | |
386 | 865 |
0 | 866 if (mm_flags & MM_MMXEXT) { |
294 | 867 pix_abs16x16 = pix_abs16x16_mmx2; |
868 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; | |
869 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; | |
870 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; | |
415 | 871 |
294 | 872 pix_abs8x8 = pix_abs8x8_mmx2; |
873 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; | |
874 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; | |
875 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; | |
386 | 876 |
877 put_pixels_tab[1] = put_pixels_x2_mmx2; | |
878 put_pixels_tab[2] = put_pixels_y2_mmx2; | |
879 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2; | |
880 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2; | |
415 | 881 |
386 | 882 avg_pixels_tab[0] = avg_pixels_mmx2; |
883 avg_pixels_tab[1] = avg_pixels_x2_mmx2; | |
884 avg_pixels_tab[2] = avg_pixels_y2_mmx2; | |
885 avg_pixels_tab[3] = avg_pixels_xy2_mmx2; | |
0 | 886 } else if (mm_flags & MM_3DNOW) { |
887 put_pixels_tab[1] = put_pixels_x2_3dnow; | |
888 put_pixels_tab[2] = put_pixels_y2_3dnow; | |
386 | 889 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_3dnow; |
890 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_3dnow; | |
393 | 891 |
0 | 892 avg_pixels_tab[0] = avg_pixels_3dnow; |
893 avg_pixels_tab[1] = avg_pixels_x2_3dnow; | |
894 avg_pixels_tab[2] = avg_pixels_y2_3dnow; | |
895 avg_pixels_tab[3] = avg_pixels_xy2_3dnow; | |
896 } | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
8
diff
changeset
|
897 |
42 | 898 /* idct */ |
899 if (mm_flags & MM_MMXEXT) { | |
900 ff_idct = ff_mmxext_idct; | |
901 } else { | |
902 ff_idct = ff_mmx_idct; | |
903 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
904 #ifdef SIMPLE_IDCT |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
905 // ff_idct = simple_idct; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
906 ff_idct = simple_idct_mmx; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
151
diff
changeset
|
907 #endif |
0 | 908 } |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
909 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
910 #if 0 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
911 // for speed testing |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
912 get_pixels = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
913 put_pixels_clamped = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
914 add_pixels_clamped = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
915 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
916 pix_abs16x16 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
917 pix_abs16x16_x2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
918 pix_abs16x16_y2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
919 pix_abs16x16_xy2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
920 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
921 put_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
922 put_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
923 put_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
924 put_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
925 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
926 put_no_rnd_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
927 put_no_rnd_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
928 put_no_rnd_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
929 put_no_rnd_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
930 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
931 avg_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
932 avg_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
933 avg_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
934 avg_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
935 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
936 avg_no_rnd_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
937 avg_no_rnd_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
938 avg_no_rnd_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
939 avg_no_rnd_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
940 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
941 //av_fdct = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
942 //ff_idct = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
943 #endif |
0 | 944 } |
402 | 945 |
946 /* remove any non bit exact operation (testing purpose). NOTE that | |
947 this function should be kept as small as possible because it is | |
948 always difficult to test automatically non bit exact cases. */ | |
949 void dsputil_set_bit_exact_mmx(void) | |
950 { | |
951 if (mm_flags & MM_MMX) { | |
952 if (mm_flags & MM_MMXEXT) { | |
953 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
954 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
955 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
956 } else if (mm_flags & MM_3DNOW) { | |
957 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
958 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
959 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
960 } | |
961 } | |
962 } |