Mercurial > libavcodec.hg
annotate i386/dsputil_mmx.c @ 864:6eda806efda9 libavcodec
* not checking for get_buffer_callback - it would be user's error
* if DR1 is disabled in get_buffer_callback silently fallback to
non-dr rendering
author | kabi |
---|---|
date | Thu, 14 Nov 2002 18:43:49 +0000 |
parents | eacc2dd8fd9d |
children | 725ef4ea3ecc |
rev | line source |
---|---|
0 | 1 /* |
2 * MMX optimized DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
0 | 4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
0 | 9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
0 | 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
0 | 14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
0 | 18 * |
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
20 */ | |
21 | |
22 #include "../dsputil.h" | |
23 | |
5 | 24 int mm_flags; /* multimedia extension flags */ |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
25 /* FIXME use them in static form */ |
294 | 26 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
27 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
28 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
29 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
30 | |
31 int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
32 int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
33 int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
34 int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
35 | |
36 int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
37 int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
38 int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
39 int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
40 | |
41 int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
42 int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
43 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
44 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
45 | |
0 | 46 /* pixel operations */ |
387 | 47 static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; |
48 static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; | |
49 static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL; | |
0 | 50 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
51 #define JUMPALIGN() __asm __volatile (".balign 8"::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
52 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
53 |
448 | 54 #define MOVQ_WONE(regd) \ |
55 __asm __volatile ( \ | |
56 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ | |
57 "psrlw $15, %%" #regd ::) | |
58 | |
59 #define MOVQ_BFE(regd) \ | |
60 __asm __volatile ( \ | |
61 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ | |
62 "paddb %%" #regd ", %%" #regd " \n\t" ::) | |
63 | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
64 #ifndef PIC |
448 | 65 #define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone)) |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
66 #define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo)) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
67 #else |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
68 // for shared library it's better to use this way for accessing constants |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
69 // pcmpeqd -> -1 |
448 | 70 #define MOVQ_BONE(regd) \ |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
71 __asm __volatile ( \ |
448 | 72 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
73 "psrlw $15, %%" #regd " \n\t" \ | |
74 "packuswb %%" #regd ", %%" #regd " \n\t" ::) | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
75 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
76 #define MOVQ_WTWO(regd) \ |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
77 __asm __volatile ( \ |
448 | 78 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
79 "psrlw $15, %%" #regd " \n\t" \ | |
80 "psllw $1, %%" #regd " \n\t"::) | |
387 | 81 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
82 #endif |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
83 |
448 | 84 // using regr as temporary and for the output result |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
85 // first argument is unmodifed and second is trashed |
471 | 86 // regfe is supposed to contain 0xfefefefefefefefe |
87 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \ | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
88 "movq " #rega ", " #regr " \n\t"\ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
89 "pand " #regb ", " #regr " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
90 "pxor " #rega ", " #regb " \n\t"\ |
471 | 91 "pand " #regfe "," #regb " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
92 "psrlq $1, " #regb " \n\t"\ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
93 "paddb " #regb ", " #regr " \n\t" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
94 |
471 | 95 #define PAVGB_MMX(rega, regb, regr, regfe) \ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
96 "movq " #rega ", " #regr " \n\t"\ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
97 "por " #regb ", " #regr " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
98 "pxor " #rega ", " #regb " \n\t"\ |
471 | 99 "pand " #regfe "," #regb " \n\t"\ |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
100 "psrlq $1, " #regb " \n\t"\ |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
101 "psubb " #regb ", " #regr " \n\t" |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
102 |
471 | 103 // mm6 is supposed to contain 0xfefefefefefefefe |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
104 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
105 "movq " #rega ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
106 "movq " #regc ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
107 "pand " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
108 "pand " #regd ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
109 "pxor " #rega ", " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
110 "pxor " #regc ", " #regd " \n\t"\ |
448 | 111 "pand %%mm6, " #regb " \n\t"\ |
112 "pand %%mm6, " #regd " \n\t"\ | |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
113 "psrlq $1, " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
114 "psrlq $1, " #regd " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
115 "paddb " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
116 "paddb " #regd ", " #regp " \n\t" |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
117 |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
118 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
119 "movq " #rega ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
120 "movq " #regc ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
121 "por " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
122 "por " #regd ", " #regp " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
123 "pxor " #rega ", " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
124 "pxor " #regc ", " #regd " \n\t"\ |
448 | 125 "pand %%mm6, " #regb " \n\t"\ |
126 "pand %%mm6, " #regd " \n\t"\ | |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
127 "psrlq $1, " #regd " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
128 "psrlq $1, " #regb " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
129 "psubb " #regb ", " #regr " \n\t"\ |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
130 "psubb " #regd ", " #regp " \n\t" |
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
131 |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
132 /***********************************/ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
133 /* MMX no rounding */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
134 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx |
448 | 135 #define SET_RND MOVQ_WONE |
136 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) | |
471 | 137 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
138 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
139 #include "dsputil_mmx_rnd.h" |
444
a5edef76dac6
* new mmx code - based upon http://aggregate.org/MAGIC
kabi
parents:
438
diff
changeset
|
140 |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
141 #undef DEF |
448 | 142 #undef SET_RND |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
143 #undef PAVGBP |
471 | 144 #undef PAVGB |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
145 /***********************************/ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
146 /* MMX rounding */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
147 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
148 #define DEF(x, y) x ## _ ## y ##_mmx |
448 | 149 #define SET_RND MOVQ_WTWO |
150 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) | |
471 | 151 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
152 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
153 #include "dsputil_mmx_rnd.h" |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
154 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
444
diff
changeset
|
155 #undef DEF |
448 | 156 #undef SET_RND |
446
efe0c0d40577
* reenabled original xy2 put routine - rounding error is really bad with
kabi
parents:
445
diff
changeset
|
157 #undef PAVGBP |
471 | 158 #undef PAVGB |
387 | 159 |
0 | 160 /***********************************/ |
161 /* 3Dnow specific */ | |
162 | |
163 #define DEF(x) x ## _3dnow | |
164 /* for Athlons PAVGUSB is prefered */ | |
165 #define PAVGB "pavgusb" | |
166 | |
167 #include "dsputil_mmx_avg.h" | |
168 | |
169 #undef DEF | |
170 #undef PAVGB | |
171 | |
172 /***********************************/ | |
173 /* MMX2 specific */ | |
174 | |
386 | 175 #define DEF(x) x ## _mmx2 |
0 | 176 |
177 /* Introduced only in MMX2 set */ | |
178 #define PAVGB "pavgb" | |
179 | |
180 #include "dsputil_mmx_avg.h" | |
181 | |
182 #undef DEF | |
183 #undef PAVGB | |
184 | |
185 /***********************************/ | |
186 /* standard MMX */ | |
187 | |
188 static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) | |
189 { | |
386 | 190 asm volatile( |
191 "movl $-128, %%eax \n\t" | |
192 "pxor %%mm7, %%mm7 \n\t" | |
193 ".balign 16 \n\t" | |
194 "1: \n\t" | |
195 "movq (%0), %%mm0 \n\t" | |
196 "movq (%0, %2), %%mm2 \n\t" | |
197 "movq %%mm0, %%mm1 \n\t" | |
198 "movq %%mm2, %%mm3 \n\t" | |
199 "punpcklbw %%mm7, %%mm0 \n\t" | |
200 "punpckhbw %%mm7, %%mm1 \n\t" | |
201 "punpcklbw %%mm7, %%mm2 \n\t" | |
202 "punpckhbw %%mm7, %%mm3 \n\t" | |
203 "movq %%mm0, (%1, %%eax)\n\t" | |
204 "movq %%mm1, 8(%1, %%eax)\n\t" | |
205 "movq %%mm2, 16(%1, %%eax)\n\t" | |
206 "movq %%mm3, 24(%1, %%eax)\n\t" | |
207 "addl %3, %0 \n\t" | |
208 "addl $32, %%eax \n\t" | |
209 "js 1b \n\t" | |
210 : "+r" (pixels) | |
211 : "r" (block+64), "r" (line_size), "r" (line_size*2) | |
212 : "%eax" | |
213 ); | |
0 | 214 } |
215 | |
324 | 216 static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) |
217 { | |
218 asm volatile( | |
386 | 219 "pxor %%mm7, %%mm7 \n\t" |
220 "movl $-128, %%eax \n\t" | |
324 | 221 ".balign 16 \n\t" |
222 "1: \n\t" | |
223 "movq (%0), %%mm0 \n\t" | |
224 "movq (%1), %%mm2 \n\t" | |
225 "movq %%mm0, %%mm1 \n\t" | |
226 "movq %%mm2, %%mm3 \n\t" | |
227 "punpcklbw %%mm7, %%mm0 \n\t" | |
228 "punpckhbw %%mm7, %%mm1 \n\t" | |
229 "punpcklbw %%mm7, %%mm2 \n\t" | |
230 "punpckhbw %%mm7, %%mm3 \n\t" | |
231 "psubw %%mm2, %%mm0 \n\t" | |
232 "psubw %%mm3, %%mm1 \n\t" | |
233 "movq %%mm0, (%2, %%eax)\n\t" | |
234 "movq %%mm1, 8(%2, %%eax)\n\t" | |
235 "addl %3, %0 \n\t" | |
236 "addl %3, %1 \n\t" | |
237 "addl $16, %%eax \n\t" | |
238 "jnz 1b \n\t" | |
239 : "+r" (s1), "+r" (s2) | |
240 : "r" (block+64), "r" (stride) | |
241 : "%eax" | |
242 ); | |
243 } | |
244 | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
245 void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
0 | 246 { |
247 const DCTELEM *p; | |
248 UINT8 *pix; | |
249 | |
250 /* read the pixels */ | |
251 p = block; | |
252 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
253 /* unrolled loop */ |
0 | 254 __asm __volatile( |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
255 "movq %3, %%mm0\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
256 "movq 8%3, %%mm1\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
257 "movq 16%3, %%mm2\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
258 "movq 24%3, %%mm3\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
259 "movq 32%3, %%mm4\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
260 "movq 40%3, %%mm5\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
261 "movq 48%3, %%mm6\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
262 "movq 56%3, %%mm7\n\t" |
0 | 263 "packuswb %%mm1, %%mm0\n\t" |
264 "packuswb %%mm3, %%mm2\n\t" | |
265 "packuswb %%mm5, %%mm4\n\t" | |
266 "packuswb %%mm7, %%mm6\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
267 "movq %%mm0, (%0)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
268 "movq %%mm2, (%0, %1)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
269 "movq %%mm4, (%0, %1, 2)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
270 "movq %%mm6, (%0, %2)\n\t" |
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
271 ::"r" (pix), "r" (line_size), "r" (line_size*3), "m"(*p) |
0 | 272 :"memory"); |
273 pix += line_size*4; | |
274 p += 32; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
275 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
276 // if here would be an exact copy of the code above |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
277 // compiler would generate some very strange code |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
278 // thus using "r" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
279 __asm __volatile( |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
280 "movq (%3), %%mm0\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
281 "movq 8(%3), %%mm1\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
282 "movq 16(%3), %%mm2\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
283 "movq 24(%3), %%mm3\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
284 "movq 32(%3), %%mm4\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
285 "movq 40(%3), %%mm5\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
286 "movq 48(%3), %%mm6\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
287 "movq 56(%3), %%mm7\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
288 "packuswb %%mm1, %%mm0\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
289 "packuswb %%mm3, %%mm2\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
290 "packuswb %%mm5, %%mm4\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
291 "packuswb %%mm7, %%mm6\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
292 "movq %%mm0, (%0)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
293 "movq %%mm2, (%0, %1)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
294 "movq %%mm4, (%0, %1, 2)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
295 "movq %%mm6, (%0, %2)\n\t" |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
296 ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
297 :"memory"); |
0 | 298 } |
299 | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
300 void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) |
0 | 301 { |
302 const DCTELEM *p; | |
303 UINT8 *pix; | |
304 int i; | |
305 | |
306 /* read the pixels */ | |
307 p = block; | |
308 pix = pixels; | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
309 MOVQ_ZERO(mm7); |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
310 i = 4; |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
311 do { |
0 | 312 __asm __volatile( |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
313 "movq (%2), %%mm0\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
314 "movq 8(%2), %%mm1\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
315 "movq 16(%2), %%mm2\n\t" |
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
316 "movq 24(%2), %%mm3\n\t" |
0 | 317 "movq %0, %%mm4\n\t" |
318 "movq %1, %%mm6\n\t" | |
319 "movq %%mm4, %%mm5\n\t" | |
320 "punpcklbw %%mm7, %%mm4\n\t" | |
321 "punpckhbw %%mm7, %%mm5\n\t" | |
322 "paddsw %%mm4, %%mm0\n\t" | |
323 "paddsw %%mm5, %%mm1\n\t" | |
324 "movq %%mm6, %%mm5\n\t" | |
325 "punpcklbw %%mm7, %%mm6\n\t" | |
326 "punpckhbw %%mm7, %%mm5\n\t" | |
327 "paddsw %%mm6, %%mm2\n\t" | |
328 "paddsw %%mm5, %%mm3\n\t" | |
329 "packuswb %%mm1, %%mm0\n\t" | |
330 "packuswb %%mm3, %%mm2\n\t" | |
331 "movq %%mm0, %0\n\t" | |
332 "movq %%mm2, %1\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
42
diff
changeset
|
333 :"+m"(*pix), "+m"(*(pix+line_size)) |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
334 :"r"(p) |
0 | 335 :"memory"); |
336 pix += line_size*2; | |
337 p += 16; | |
342
8635a7036395
* fixes problem with -funroll-loops and buggy gcc compiler
kabi
parents:
324
diff
changeset
|
338 } while (--i); |
0 | 339 } |
340 | |
651 | 341 static void put_pixels8_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
0 | 342 { |
471 | 343 __asm __volatile( |
420 | 344 "lea (%3, %3), %%eax \n\t" |
422 | 345 ".balign 8 \n\t" |
420 | 346 "1: \n\t" |
347 "movq (%1), %%mm0 \n\t" | |
348 "movq (%1, %3), %%mm1 \n\t" | |
349 "movq %%mm0, (%2) \n\t" | |
350 "movq %%mm1, (%2, %3) \n\t" | |
351 "addl %%eax, %1 \n\t" | |
352 "addl %%eax, %2 \n\t" | |
353 "movq (%1), %%mm0 \n\t" | |
354 "movq (%1, %3), %%mm1 \n\t" | |
355 "movq %%mm0, (%2) \n\t" | |
356 "movq %%mm1, (%2, %3) \n\t" | |
357 "addl %%eax, %1 \n\t" | |
358 "addl %%eax, %2 \n\t" | |
359 "subl $4, %0 \n\t" | |
360 "jnz 1b \n\t" | |
361 : "+g"(h), "+r" (pixels), "+r" (block) | |
362 : "r"(line_size) | |
363 : "%eax", "memory" | |
364 ); | |
0 | 365 } |
366 | |
651 | 367 static void put_pixels16_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
368 { | |
369 __asm __volatile( | |
370 "lea (%3, %3), %%eax \n\t" | |
371 ".balign 8 \n\t" | |
372 "1: \n\t" | |
373 "movq (%1), %%mm0 \n\t" | |
374 "movq 8(%1), %%mm4 \n\t" | |
375 "movq (%1, %3), %%mm1 \n\t" | |
376 "movq 8(%1, %3), %%mm5 \n\t" | |
377 "movq %%mm0, (%2) \n\t" | |
378 "movq %%mm4, 8(%2) \n\t" | |
379 "movq %%mm1, (%2, %3) \n\t" | |
380 "movq %%mm5, 8(%2, %3) \n\t" | |
381 "addl %%eax, %1 \n\t" | |
382 "addl %%eax, %2 \n\t" | |
383 "movq (%1), %%mm0 \n\t" | |
384 "movq 8(%1), %%mm4 \n\t" | |
385 "movq (%1, %3), %%mm1 \n\t" | |
386 "movq 8(%1, %3), %%mm5 \n\t" | |
387 "movq %%mm0, (%2) \n\t" | |
388 "movq %%mm4, 8(%2) \n\t" | |
389 "movq %%mm1, (%2, %3) \n\t" | |
390 "movq %%mm5, 8(%2, %3) \n\t" | |
391 "addl %%eax, %1 \n\t" | |
392 "addl %%eax, %2 \n\t" | |
393 "subl $4, %0 \n\t" | |
394 "jnz 1b \n\t" | |
395 : "+g"(h), "+r" (pixels), "+r" (block) | |
396 : "r"(line_size) | |
397 : "%eax", "memory" | |
398 ); | |
399 } | |
400 | |
296 | 401 static void clear_blocks_mmx(DCTELEM *blocks) |
402 { | |
471 | 403 __asm __volatile( |
296 | 404 "pxor %%mm7, %%mm7 \n\t" |
405 "movl $-128*6, %%eax \n\t" | |
406 "1: \n\t" | |
407 "movq %%mm7, (%0, %%eax) \n\t" | |
408 "movq %%mm7, 8(%0, %%eax) \n\t" | |
409 "movq %%mm7, 16(%0, %%eax) \n\t" | |
410 "movq %%mm7, 24(%0, %%eax) \n\t" | |
411 "addl $32, %%eax \n\t" | |
412 " js 1b \n\t" | |
413 : : "r" (((int)blocks)+128*6) | |
414 : "%eax" | |
415 ); | |
416 } | |
417 | |
688 | 418 static int pix_sum16_mmx(UINT8 * pix, int line_size){ |
419 const int h=16; | |
420 int sum; | |
421 int index= -line_size*h; | |
422 | |
423 __asm __volatile( | |
424 "pxor %%mm7, %%mm7 \n\t" | |
425 "pxor %%mm6, %%mm6 \n\t" | |
426 "1: \n\t" | |
427 "movq (%2, %1), %%mm0 \n\t" | |
428 "movq (%2, %1), %%mm1 \n\t" | |
429 "movq 8(%2, %1), %%mm2 \n\t" | |
430 "movq 8(%2, %1), %%mm3 \n\t" | |
431 "punpcklbw %%mm7, %%mm0 \n\t" | |
432 "punpckhbw %%mm7, %%mm1 \n\t" | |
433 "punpcklbw %%mm7, %%mm2 \n\t" | |
434 "punpckhbw %%mm7, %%mm3 \n\t" | |
435 "paddw %%mm0, %%mm1 \n\t" | |
436 "paddw %%mm2, %%mm3 \n\t" | |
437 "paddw %%mm1, %%mm3 \n\t" | |
438 "paddw %%mm3, %%mm6 \n\t" | |
439 "addl %3, %1 \n\t" | |
440 " js 1b \n\t" | |
441 "movq %%mm6, %%mm5 \n\t" | |
442 "psrlq $32, %%mm6 \n\t" | |
443 "paddw %%mm5, %%mm6 \n\t" | |
444 "movq %%mm6, %%mm5 \n\t" | |
445 "psrlq $16, %%mm6 \n\t" | |
446 "paddw %%mm5, %%mm6 \n\t" | |
447 "movd %%mm6, %0 \n\t" | |
448 "andl $0xFFFF, %0 \n\t" | |
449 : "=&r" (sum), "+r" (index) | |
450 : "r" (pix - index), "r" (line_size) | |
451 ); | |
452 | |
453 return sum; | |
454 } | |
455 | |
393 | 456 #if 0 |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
457 static void just_return() { return; } |
393 | 458 #endif |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
459 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
460 void dsputil_init_mmx(DSPContext* c, unsigned mask) |
0 | 461 { |
462 mm_flags = mm_support(); | |
631
47a8964ba5cd
be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>)
michaelni
parents:
629
diff
changeset
|
463 #if 0 |
47a8964ba5cd
be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>)
michaelni
parents:
629
diff
changeset
|
464 fprintf(stderr, "libavcodec: CPU flags:"); |
0 | 465 if (mm_flags & MM_MMX) |
631
47a8964ba5cd
be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>)
michaelni
parents:
629
diff
changeset
|
466 fprintf(stderr, " mmx"); |
0 | 467 if (mm_flags & MM_MMXEXT) |
631
47a8964ba5cd
be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>)
michaelni
parents:
629
diff
changeset
|
468 fprintf(stderr, " mmxext"); |
0 | 469 if (mm_flags & MM_3DNOW) |
631
47a8964ba5cd
be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>)
michaelni
parents:
629
diff
changeset
|
470 fprintf(stderr, " 3dnow"); |
0 | 471 if (mm_flags & MM_SSE) |
631
47a8964ba5cd
be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>)
michaelni
parents:
629
diff
changeset
|
472 fprintf(stderr, " sse"); |
0 | 473 if (mm_flags & MM_SSE2) |
631
47a8964ba5cd
be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>)
michaelni
parents:
629
diff
changeset
|
474 fprintf(stderr, " sse2"); |
47a8964ba5cd
be less verbose patch by (Lennert Buytenhek <buytenh at math dot leidenuniv dot nl>)
michaelni
parents:
629
diff
changeset
|
475 fprintf(stderr, "\n"); |
0 | 476 #endif |
477 | |
478 if (mm_flags & MM_MMX) { | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
479 c->get_pixels = get_pixels_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
480 c->diff_pixels = diff_pixels_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
481 c->put_pixels_clamped = put_pixels_clamped_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
482 c->add_pixels_clamped = add_pixels_clamped_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
483 c->clear_blocks = clear_blocks_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
484 c->pix_sum = pix_sum16_mmx; |
415 | 485 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
486 c->pix_abs16x16 = pix_abs16x16_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
487 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
488 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
489 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
490 c->pix_abs8x8 = pix_abs8x8_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
491 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
492 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
493 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx; |
574 | 494 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
495 c->put_pixels_tab[0][0] = put_pixels16_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
496 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
497 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
498 c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; |
0 | 499 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
500 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
501 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
502 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
503 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; |
651 | 504 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
505 c->avg_pixels_tab[0][0] = avg_pixels16_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
506 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
507 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
508 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; |
415 | 509 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
510 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
511 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
512 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
513 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
514 |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
515 c->put_pixels_tab[1][0] = put_pixels8_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
516 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
517 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
518 c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx; |
0 | 519 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
520 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
521 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
522 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
523 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; |
651 | 524 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
525 c->avg_pixels_tab[1][0] = avg_pixels8_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
526 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
527 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
528 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; |
651 | 529 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
530 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
531 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
532 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
533 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; |
386 | 534 |
0 | 535 if (mm_flags & MM_MMXEXT) { |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
536 c->pix_abs16x16 = pix_abs16x16_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
537 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
538 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
539 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; |
415 | 540 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
541 c->pix_abs8x8 = pix_abs8x8_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
542 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
543 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
544 c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; |
386 | 545 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
546 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
547 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
548 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
549 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; |
651 | 550 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
551 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
552 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
553 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
554 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; |
415 | 555 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
556 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
557 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
558 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
559 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; |
651 | 560 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
561 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
562 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
563 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
564 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; |
0 | 565 } else if (mm_flags & MM_3DNOW) { |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
566 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
567 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
568 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
569 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; |
393 | 570 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
571 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
572 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
573 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
574 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; |
651 | 575 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
576 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
577 c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
578 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
579 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
580 |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
581 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
582 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
583 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
584 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; |
0 | 585 } |
586 } | |
247
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
587 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
588 #if 0 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
589 // for speed testing |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
590 get_pixels = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
591 put_pixels_clamped = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
592 add_pixels_clamped = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
593 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
594 pix_abs16x16 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
595 pix_abs16x16_x2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
596 pix_abs16x16_y2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
597 pix_abs16x16_xy2 = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
598 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
599 put_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
600 put_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
601 put_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
602 put_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
603 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
604 put_no_rnd_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
605 put_no_rnd_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
606 put_no_rnd_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
607 put_no_rnd_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
608 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
609 avg_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
610 avg_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
611 avg_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
612 avg_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
613 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
614 avg_no_rnd_pixels_tab[0] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
615 avg_no_rnd_pixels_tab[1] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
616 avg_no_rnd_pixels_tab[2] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
617 avg_no_rnd_pixels_tab[3] = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
618 |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
619 //av_fdct = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
620 //ff_idct = just_return; |
6f48cacd9ed9
* some modifications to allow gcc to compile same code for -fPIC
kabi
parents:
188
diff
changeset
|
621 #endif |
0 | 622 } |
402 | 623 |
624 /* remove any non bit exact operation (testing purpose). NOTE that | |
625 this function should be kept as small as possible because it is | |
626 always difficult to test automatically non bit exact cases. */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
627 void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask) |
402 | 628 { |
629 if (mm_flags & MM_MMX) { | |
651 | 630 /* MMX2 & 3DNOW */ |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
631 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
632 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
633 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
634 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
635 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
636 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; |
651 | 637 |
402 | 638 if (mm_flags & MM_MMXEXT) { |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
639 c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
640 c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
641 c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
642 c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
643 c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
706
diff
changeset
|
644 c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; |
402 | 645 } |
646 } | |
647 } |