Mercurial > libavcodec.hg
annotate i386/dsputil_mmx_avg.h @ 394:e2cb8a4ee0c5 libavcodec
proper memory handling functions
author | glantau |
---|---|
date | Sat, 18 May 2002 22:59:50 +0000 |
parents | f874493a1970 |
children | 92d143c2d5a8 |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils : average functions are compiled twice for 3dnow/mmx2 | |
3 * Copyright (c) 2000, 2001 Gerard Lantau. | |
386 | 4 * Copyright (c) 2002 Michael Niedermayer |
0 | 5 * |
6 * This program is free software; you can redistribute it and/or modify | |
7 * it under the terms of the GNU General Public License as published by | |
8 * the Free Software Foundation; either version 2 of the License, or | |
9 * (at your option) any later version. | |
10 * | |
11 * This program is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 * GNU General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU General Public License | |
17 * along with this program; if not, write to the Free Software | |
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
19 * | |
20 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
386 | 21 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at> |
0 | 22 */ |
387 | 23 |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
24 /* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
25 clobber bug */ |
0 | 26 static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
27 { | |
386 | 28 __asm __volatile( |
29 "xorl %%eax, %%eax \n\t" | |
30 ".balign 16 \n\t" | |
31 "1: \n\t" | |
32 "movq (%1, %%eax), %%mm0 \n\t" | |
33 "movq 1(%1, %%eax), %%mm1 \n\t" | |
34 "movq (%2, %%eax), %%mm2 \n\t" | |
35 "movq 1(%2, %%eax), %%mm3 \n\t" | |
36 PAVGB" %%mm1, %%mm0 \n\t" | |
37 PAVGB" %%mm3, %%mm2 \n\t" | |
38 "movq %%mm0, (%3, %%eax) \n\t" | |
39 "movq %%mm2, (%4, %%eax) \n\t" | |
40 "addl %5, %%eax \n\t" | |
41 "movq (%1, %%eax), %%mm0 \n\t" | |
42 "movq 1(%1, %%eax), %%mm1 \n\t" | |
43 "movq (%2, %%eax), %%mm2 \n\t" | |
44 "movq 1(%2, %%eax), %%mm3 \n\t" | |
45 PAVGB" %%mm1, %%mm0 \n\t" | |
46 PAVGB" %%mm3, %%mm2 \n\t" | |
47 "movq %%mm0, (%3, %%eax) \n\t" | |
48 "movq %%mm2, (%4, %%eax) \n\t" | |
49 "addl %5, %%eax \n\t" | |
50 "subl $4, %0 \n\t" | |
51 " jnz 1b \n\t" | |
52 :"+g"(h) | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
53 :"b"(pixels), "c"(pixels+line_size), "d" (block), "S" (block+line_size), |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
54 "D"(line_size<<1) |
386 | 55 :"%eax", "memory"); |
56 } | |
57 | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
58 /* GL: this function does incorrect rounding if overflow */ |
386 | 59 static void DEF(put_no_rnd_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
60 { | |
0 | 61 __asm __volatile( |
386 | 62 "xorl %%eax, %%eax \n\t" |
387 | 63 MOVQ_BONE(%%mm7) |
386 | 64 ".balign 16 \n\t" |
65 "1: \n\t" | |
66 "movq (%1, %%eax), %%mm0 \n\t" | |
67 "movq 1(%1, %%eax), %%mm1 \n\t" | |
68 "movq (%2, %%eax), %%mm2 \n\t" | |
69 "movq 1(%2, %%eax), %%mm3 \n\t" | |
70 "psubusb %%mm7, %%mm0 \n\t" | |
71 "psubusb %%mm7, %%mm2 \n\t" | |
72 PAVGB" %%mm1, %%mm0 \n\t" | |
73 PAVGB" %%mm3, %%mm2 \n\t" | |
74 "movq %%mm0, (%3, %%eax) \n\t" | |
75 "movq %%mm2, (%4, %%eax) \n\t" | |
76 "addl %5, %%eax \n\t" | |
77 "movq (%1, %%eax), %%mm0 \n\t" | |
78 "movq 1(%1, %%eax), %%mm1 \n\t" | |
79 "movq (%2, %%eax), %%mm2 \n\t" | |
80 "movq 1(%2, %%eax), %%mm3 \n\t" | |
81 "psubusb %%mm7, %%mm0 \n\t" | |
82 "psubusb %%mm7, %%mm2 \n\t" | |
83 PAVGB" %%mm1, %%mm0 \n\t" | |
84 PAVGB" %%mm3, %%mm2 \n\t" | |
85 "movq %%mm0, (%3, %%eax) \n\t" | |
86 "movq %%mm2, (%4, %%eax) \n\t" | |
87 "addl %5, %%eax \n\t" | |
88 "subl $4, %0 \n\t" | |
89 " jnz 1b \n\t" | |
90 :"+g"(h) | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
91 :"b"(pixels), "c"(pixels+line_size), "d" (block), "S" (block+line_size), |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
92 "D"(line_size<<1) |
386 | 93 :"%eax", "memory"); |
0 | 94 } |
95 | |
96 static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
97 { | |
98 __asm __volatile( | |
386 | 99 "xorl %%eax, %%eax \n\t" |
100 "movq (%1), %%mm0 \n\t" | |
101 ".balign 16 \n\t" | |
102 "1: \n\t" | |
103 "movq (%2, %%eax), %%mm1 \n\t" | |
104 "movq (%3, %%eax), %%mm2 \n\t" | |
105 PAVGB" %%mm1, %%mm0 \n\t" | |
106 PAVGB" %%mm2, %%mm1 \n\t" | |
107 "movq %%mm0, (%4, %%eax) \n\t" | |
108 "movq %%mm1, (%5, %%eax) \n\t" | |
109 "addl %6, %%eax \n\t" | |
110 "movq (%2, %%eax), %%mm1 \n\t" | |
111 "movq (%3, %%eax), %%mm0 \n\t" | |
112 PAVGB" %%mm1, %%mm2 \n\t" | |
113 PAVGB" %%mm0, %%mm1 \n\t" | |
114 "movq %%mm2, (%4, %%eax) \n\t" | |
115 "movq %%mm1, (%5, %%eax) \n\t" | |
116 "addl %6, %%eax \n\t" | |
117 "subl $4, %0 \n\t" | |
118 " jnz 1b \n\t" | |
119 :"+g"(h) | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
120 :"b"(pixels), "c"(pixels+line_size), "d"(pixels+line_size*2), "S" (block), |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
121 "D" (block+line_size), "g"(line_size<<1) |
386 | 122 :"%eax", "memory"); |
123 } | |
124 | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
125 /* GL: this function does incorrect rounding if overflow */ |
386 | 126 static void DEF(put_no_rnd_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
127 { | |
0 | 128 __asm __volatile( |
387 | 129 MOVQ_BONE(%%mm7) |
386 | 130 "xorl %%eax, %%eax \n\t" |
131 "movq (%1), %%mm0 \n\t" | |
132 ".balign 16 \n\t" | |
133 "1: \n\t" | |
134 "movq (%2, %%eax), %%mm1 \n\t" | |
135 "movq (%3, %%eax), %%mm2 \n\t" | |
136 "psubusb %%mm7, %%mm1 \n\t" | |
137 PAVGB" %%mm1, %%mm0 \n\t" | |
138 PAVGB" %%mm2, %%mm1 \n\t" | |
139 "movq %%mm0, (%4, %%eax) \n\t" | |
140 "movq %%mm1, (%5, %%eax) \n\t" | |
141 "addl %6, %%eax \n\t" | |
142 "movq (%2, %%eax), %%mm1 \n\t" | |
143 "movq (%3, %%eax), %%mm0 \n\t" | |
144 "psubusb %%mm7, %%mm1 \n\t" | |
145 PAVGB" %%mm1, %%mm2 \n\t" | |
146 PAVGB" %%mm0, %%mm1 \n\t" | |
147 "movq %%mm2, (%4, %%eax) \n\t" | |
148 "movq %%mm1, (%5, %%eax) \n\t" | |
149 "addl %6, %%eax \n\t" | |
150 "subl $4, %0 \n\t" | |
151 " jnz 1b \n\t" | |
152 :"+g"(h) | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
153 :"b"(pixels), "c"(pixels+line_size), "d"(pixels+line_size*2), "S" (block), |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
154 "D" (block+line_size), "g"(line_size<<1) |
386 | 155 :"%eax", "memory"); |
0 | 156 } |
157 | |
158 static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
159 { | |
160 __asm __volatile( | |
386 | 161 "xorl %%eax, %%eax \n\t" |
162 ".balign 16 \n\t" | |
163 "1: \n\t" | |
164 "movq (%1, %%eax), %%mm0 \n\t" | |
165 "movq (%2, %%eax), %%mm2 \n\t" | |
166 "movq (%3, %%eax), %%mm3 \n\t" | |
167 "movq (%4, %%eax), %%mm4 \n\t" | |
168 PAVGB" %%mm3, %%mm0 \n\t" | |
169 PAVGB" %%mm4, %%mm2 \n\t" | |
170 "movq %%mm0, (%3, %%eax) \n\t" | |
171 "movq %%mm2, (%4, %%eax) \n\t" | |
172 "addl %5, %%eax \n\t" | |
173 "movq (%1, %%eax), %%mm0 \n\t" | |
174 "movq (%2, %%eax), %%mm2 \n\t" | |
175 "movq (%3, %%eax), %%mm3 \n\t" | |
176 "movq (%4, %%eax), %%mm4 \n\t" | |
177 PAVGB" %%mm3, %%mm0 \n\t" | |
178 PAVGB" %%mm4, %%mm2 \n\t" | |
179 "movq %%mm0, (%3, %%eax) \n\t" | |
180 "movq %%mm2, (%4, %%eax) \n\t" | |
181 "addl %5, %%eax \n\t" | |
182 "subl $4, %0 \n\t" | |
183 " jnz 1b \n\t" | |
184 :"+g"(h) | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
185 :"b"(pixels), "c"(pixels+line_size), "d" (block), "S" (block+line_size), |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
186 "D"(line_size<<1) |
386 | 187 :"%eax", "memory"); |
0 | 188 } |
189 | |
386 | 190 static void DEF(avg_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
0 | 191 { |
192 __asm __volatile( | |
386 | 193 "xorl %%eax, %%eax \n\t" |
194 ".balign 16 \n\t" | |
195 "1: \n\t" | |
196 "movq (%1, %%eax), %%mm0 \n\t" | |
197 "movq 1(%1, %%eax), %%mm1 \n\t" | |
198 "movq (%2, %%eax), %%mm2 \n\t" | |
199 "movq 1(%2, %%eax), %%mm3 \n\t" | |
200 PAVGB" %%mm1, %%mm0 \n\t" | |
201 PAVGB" %%mm3, %%mm2 \n\t" | |
202 "movq (%3, %%eax), %%mm3 \n\t" | |
203 "movq (%4, %%eax), %%mm4 \n\t" | |
204 PAVGB" %%mm3, %%mm0 \n\t" | |
205 PAVGB" %%mm4, %%mm2 \n\t" | |
206 "movq %%mm0, (%3, %%eax) \n\t" | |
207 "movq %%mm2, (%4, %%eax) \n\t" | |
208 "addl %5, %%eax \n\t" | |
209 "movq (%1, %%eax), %%mm0 \n\t" | |
210 "movq 1(%1, %%eax), %%mm1 \n\t" | |
211 "movq (%2, %%eax), %%mm2 \n\t" | |
212 "movq 1(%2, %%eax), %%mm3 \n\t" | |
213 PAVGB" %%mm1, %%mm0 \n\t" | |
214 PAVGB" %%mm3, %%mm2 \n\t" | |
215 "movq (%3, %%eax), %%mm3 \n\t" | |
216 "movq (%4, %%eax), %%mm4 \n\t" | |
217 PAVGB" %%mm3, %%mm0 \n\t" | |
218 PAVGB" %%mm4, %%mm2 \n\t" | |
219 "movq %%mm0, (%3, %%eax) \n\t" | |
220 "movq %%mm2, (%4, %%eax) \n\t" | |
221 "addl %5, %%eax \n\t" | |
222 "subl $4, %0 \n\t" | |
223 " jnz 1b \n\t" | |
224 :"+g"(h) | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
225 :"b"(pixels), "c"(pixels+line_size), "d" (block), "S" (block+line_size), |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
226 "D"(line_size<<1) |
386 | 227 :"%eax", "memory"); |
0 | 228 } |
229 | |
386 | 230 static void DEF(avg_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
0 | 231 { |
232 __asm __volatile( | |
386 | 233 "xorl %%eax, %%eax \n\t" |
234 "movq (%1), %%mm0 \n\t" | |
235 ".balign 16 \n\t" | |
236 "1: \n\t" | |
237 "movq (%2, %%eax), %%mm1 \n\t" | |
238 "movq (%3, %%eax), %%mm2 \n\t" | |
239 PAVGB" %%mm1, %%mm0 \n\t" | |
240 PAVGB" %%mm2, %%mm1 \n\t" | |
241 "movq (%4, %%eax), %%mm3 \n\t" | |
242 "movq (%5, %%eax), %%mm4 \n\t" | |
243 PAVGB" %%mm3, %%mm0 \n\t" | |
244 PAVGB" %%mm4, %%mm1 \n\t" | |
245 "movq %%mm0, (%4, %%eax) \n\t" | |
246 "movq %%mm1, (%5, %%eax) \n\t" | |
247 "addl %6, %%eax \n\t" | |
248 "movq (%2, %%eax), %%mm1 \n\t" | |
249 "movq (%3, %%eax), %%mm0 \n\t" | |
250 PAVGB" %%mm1, %%mm2 \n\t" | |
251 PAVGB" %%mm0, %%mm1 \n\t" | |
252 "movq (%4, %%eax), %%mm3 \n\t" | |
253 "movq (%5, %%eax), %%mm4 \n\t" | |
254 PAVGB" %%mm3, %%mm2 \n\t" | |
255 PAVGB" %%mm4, %%mm1 \n\t" | |
256 "movq %%mm2, (%4, %%eax) \n\t" | |
257 "movq %%mm1, (%5, %%eax) \n\t" | |
258 "addl %6, %%eax \n\t" | |
259 "subl $4, %0 \n\t" | |
260 " jnz 1b \n\t" | |
261 :"+g"(h) | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
262 :"b"(pixels), "c"(pixels+line_size), "d"(pixels+line_size*2), "S" (block), |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
263 "D" (block+line_size), "g"(line_size<<1) |
386 | 264 :"%eax", "memory"); |
0 | 265 } |
266 | |
386 | 267 // Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter |
268 static void DEF(avg_pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | |
0 | 269 { |
270 __asm __volatile( | |
387 | 271 MOVQ_BONE(%%mm7) |
386 | 272 "xorl %%eax, %%eax \n\t" |
273 "movq (%1), %%mm0 \n\t" | |
274 "movq 1(%1), %%mm1 \n\t" | |
275 PAVGB" %%mm1, %%mm0 \n\t" | |
276 ".balign 16 \n\t" | |
277 "1: \n\t" | |
278 "movq (%2, %%eax), %%mm1 \n\t" | |
279 "movq (%3, %%eax), %%mm2 \n\t" | |
280 "movq 1(%2, %%eax), %%mm3 \n\t" | |
281 "movq 1(%3, %%eax), %%mm4 \n\t" | |
282 "psubusb %%mm7, %%mm2 \n\t" | |
283 PAVGB" %%mm3, %%mm1 \n\t" | |
284 PAVGB" %%mm4, %%mm2 \n\t" | |
285 PAVGB" %%mm1, %%mm0 \n\t" | |
286 PAVGB" %%mm2, %%mm1 \n\t" | |
287 "movq (%4, %%eax), %%mm3 \n\t" | |
288 "movq (%5, %%eax), %%mm4 \n\t" | |
289 PAVGB" %%mm3, %%mm0 \n\t" | |
290 PAVGB" %%mm4, %%mm1 \n\t" | |
291 "movq %%mm0, (%4, %%eax) \n\t" | |
292 "movq %%mm1, (%5, %%eax) \n\t" | |
293 "addl %6, %%eax \n\t" | |
294 "movq (%2, %%eax), %%mm1 \n\t" | |
295 "movq (%3, %%eax), %%mm0 \n\t" | |
296 "movq 1(%2, %%eax), %%mm3 \n\t" | |
297 "movq 1(%3, %%eax), %%mm4 \n\t" | |
298 PAVGB" %%mm3, %%mm1 \n\t" | |
299 PAVGB" %%mm4, %%mm0 \n\t" | |
300 PAVGB" %%mm1, %%mm2 \n\t" | |
301 PAVGB" %%mm0, %%mm1 \n\t" | |
302 "movq (%4, %%eax), %%mm3 \n\t" | |
303 "movq (%5, %%eax), %%mm4 \n\t" | |
304 PAVGB" %%mm3, %%mm2 \n\t" | |
305 PAVGB" %%mm4, %%mm1 \n\t" | |
306 "movq %%mm2, (%4, %%eax) \n\t" | |
307 "movq %%mm1, (%5, %%eax) \n\t" | |
308 "addl %6, %%eax \n\t" | |
309 "subl $4, %0 \n\t" | |
310 " jnz 1b \n\t" | |
311 :"+g"(h) | |
389
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
312 :"b"(pixels), "c"(pixels+line_size), "d"(pixels+line_size*2), "S" (block), |
f874493a1970
tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests)
glantau
parents:
387
diff
changeset
|
313 "D" (block+line_size), "g"(line_size<<1) |
386 | 314 :"%eax", "memory"); |
0 | 315 } |
316 | |
386 | 317 //Note: the sub* functions are no used |
318 | |
0 | 319 static void DEF(sub_pixels_x2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h) |
320 { | |
321 DCTELEM *p; | |
322 const UINT8 *pix; | |
323 p = block; | |
324 pix = pixels; | |
325 __asm __volatile( | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
8
diff
changeset
|
326 "pxor %%mm7, %%mm7":); |
0 | 327 do { |
328 __asm __volatile( | |
329 "movq 1%1, %%mm2\n\t" | |
330 "movq %0, %%mm0\n\t" | |
331 PAVGB" %1, %%mm2\n\t" | |
332 "movq 8%0, %%mm1\n\t" | |
333 "movq %%mm2, %%mm3\n\t" | |
334 "punpcklbw %%mm7, %%mm2\n\t" | |
335 "punpckhbw %%mm7, %%mm3\n\t" | |
336 "psubsw %%mm2, %%mm0\n\t" | |
337 "psubsw %%mm3, %%mm1\n\t" | |
338 "movq %%mm0, %0\n\t" | |
339 "movq %%mm1, 8%0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
8
diff
changeset
|
340 :"+m"(*p) |
0 | 341 :"m"(*pix) |
342 :"memory"); | |
343 pix += line_size; | |
344 p += 8; | |
345 } while (--h); | |
346 } | |
347 | |
348 static void DEF(sub_pixels_y2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
349 { | |
350 DCTELEM *p; | |
351 const UINT8 *pix; | |
352 p = block; | |
353 pix = pixels; | |
354 __asm __volatile( | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
8
diff
changeset
|
355 "pxor %%mm7, %%mm7":); |
0 | 356 do { |
357 __asm __volatile( | |
358 "movq %2, %%mm2\n\t" | |
359 "movq %0, %%mm0\n\t" | |
360 PAVGB" %1, %%mm2\n\t" | |
361 "movq 8%0, %%mm1\n\t" | |
362 "movq %%mm2, %%mm3\n\t" | |
363 "punpcklbw %%mm7, %%mm2\n\t" | |
364 "punpckhbw %%mm7, %%mm3\n\t" | |
365 "psubsw %%mm2, %%mm0\n\t" | |
366 "psubsw %%mm3, %%mm1\n\t" | |
367 "movq %%mm0, %0\n\t" | |
368 "movq %%mm1, 8%0\n\t" | |
151
ae0516eadae2
fixed gcc-3.0.x compilation (by Michael Niedermayer)
nickols_k
parents:
8
diff
changeset
|
369 :"+m"(*p) |
0 | 370 :"m"(*pix), "m"(*(pix+line_size)) |
371 :"memory"); | |
372 pix += line_size; | |
373 p += 8; | |
374 } while (--h); | |
375 } | |
376 |