Mercurial > libavcodec.hg
comparison i386/dsputil_mmx_avg.h @ 2209:c4a476971abc libavcodec
h264 luma motion compensation in mmx2/3dnow
author | michael |
---|---|
date | Tue, 07 Sep 2004 01:48:45 +0000 |
parents | 22b768f1261a |
children | 15cfba1b97b5 |
comparison
equal
deleted
inserted
replaced
2208:9ca8a88a8a70 | 2209:c4a476971abc |
---|---|
51 :"+g"(h), "+S"(pixels), "+D"(block) | 51 :"+g"(h), "+S"(pixels), "+D"(block) |
52 :"r" (line_size) | 52 :"r" (line_size) |
53 :"%eax", "memory"); | 53 :"%eax", "memory"); |
54 } | 54 } |
55 | 55 |
56 static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | |
57 { | |
58 __asm __volatile( | |
59 "testl $1, %0 \n\t" | |
60 " jz 1f \n\t" | |
61 "movd (%1), %%mm0 \n\t" | |
62 "movd (%2), %%mm1 \n\t" | |
63 "addl %4, %1 \n\t" | |
64 "addl $4, %2 \n\t" | |
65 PAVGB" %%mm1, %%mm0 \n\t" | |
66 "movd %%mm0, (%3) \n\t" | |
67 "addl %5, %3 \n\t" | |
68 "decl %0 \n\t" | |
69 "1: \n\t" | |
70 "movd (%1), %%mm0 \n\t" | |
71 "addl %4, %1 \n\t" | |
72 "movd (%1), %%mm1 \n\t" | |
73 "addl %4, %1 \n\t" | |
74 PAVGB" (%2), %%mm0 \n\t" | |
75 PAVGB" 4(%2), %%mm1 \n\t" | |
76 "movd %%mm0, (%3) \n\t" | |
77 "addl %5, %3 \n\t" | |
78 "movd %%mm1, (%3) \n\t" | |
79 "addl %5, %3 \n\t" | |
80 "movd (%1), %%mm0 \n\t" | |
81 "addl %4, %1 \n\t" | |
82 "movd (%1), %%mm1 \n\t" | |
83 "addl %4, %1 \n\t" | |
84 PAVGB" 8(%2), %%mm0 \n\t" | |
85 PAVGB" 12(%2), %%mm1 \n\t" | |
86 "movd %%mm0, (%3) \n\t" | |
87 "addl %5, %3 \n\t" | |
88 "movd %%mm1, (%3) \n\t" | |
89 "addl %5, %3 \n\t" | |
90 "addl $16, %2 \n\t" | |
91 "subl $4, %0 \n\t" | |
92 "jnz 1b \n\t" | |
93 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used | |
94 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |
95 #else | |
96 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |
97 #endif | |
98 :"S"(src1Stride), "D"(dstStride) | |
99 :"memory"); | |
100 } | |
101 | |
102 | |
56 static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | 103 static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
57 { | 104 { |
58 __asm __volatile( | 105 __asm __volatile( |
59 "testl $1, %0 \n\t" | 106 "testl $1, %0 \n\t" |
60 " jz 1f \n\t" | 107 " jz 1f \n\t" |
171 /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) | 218 /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) |
172 :"r"(src1Stride), "r"(dstStride) | 219 :"r"(src1Stride), "r"(dstStride) |
173 :"memory");*/ | 220 :"memory");*/ |
174 } | 221 } |
175 | 222 |
223 static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | |
224 { | |
225 __asm __volatile( | |
226 "testl $1, %0 \n\t" | |
227 " jz 1f \n\t" | |
228 "movd (%1), %%mm0 \n\t" | |
229 "movd (%2), %%mm1 \n\t" | |
230 "addl %4, %1 \n\t" | |
231 "addl $4, %2 \n\t" | |
232 PAVGB" %%mm1, %%mm0 \n\t" | |
233 PAVGB" (%3), %%mm0 \n\t" | |
234 "movd %%mm0, (%3) \n\t" | |
235 "addl %5, %3 \n\t" | |
236 "decl %0 \n\t" | |
237 "1: \n\t" | |
238 "movd (%1), %%mm0 \n\t" | |
239 "addl %4, %1 \n\t" | |
240 "movd (%1), %%mm1 \n\t" | |
241 "addl %4, %1 \n\t" | |
242 PAVGB" (%2), %%mm0 \n\t" | |
243 PAVGB" 4(%2), %%mm1 \n\t" | |
244 PAVGB" (%3), %%mm0 \n\t" | |
245 "movd %%mm0, (%3) \n\t" | |
246 "addl %5, %3 \n\t" | |
247 PAVGB" (%3), %%mm1 \n\t" | |
248 "movd %%mm1, (%3) \n\t" | |
249 "addl %5, %3 \n\t" | |
250 "movd (%1), %%mm0 \n\t" | |
251 "addl %4, %1 \n\t" | |
252 "movd (%1), %%mm1 \n\t" | |
253 "addl %4, %1 \n\t" | |
254 PAVGB" 8(%2), %%mm0 \n\t" | |
255 PAVGB" 12(%2), %%mm1 \n\t" | |
256 PAVGB" (%3), %%mm0 \n\t" | |
257 "movd %%mm0, (%3) \n\t" | |
258 "addl %5, %3 \n\t" | |
259 PAVGB" (%3), %%mm1 \n\t" | |
260 "movd %%mm1, (%3) \n\t" | |
261 "addl %5, %3 \n\t" | |
262 "addl $16, %2 \n\t" | |
263 "subl $4, %0 \n\t" | |
264 "jnz 1b \n\t" | |
265 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used | |
266 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |
267 #else | |
268 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |
269 #endif | |
270 :"S"(src1Stride), "D"(dstStride) | |
271 :"memory"); | |
272 } | |
273 | |
274 | |
176 static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | 275 static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
177 { | 276 { |
178 __asm __volatile( | 277 __asm __volatile( |
179 "testl $1, %0 \n\t" | 278 "testl $1, %0 \n\t" |
180 " jz 1f \n\t" | 279 " jz 1f \n\t" |