comparison i386/dsputil_mmx_avg.h @ 2209:c4a476971abc libavcodec

h264 luma motion compensation in mmx2/3dnow
author michael
date Tue, 07 Sep 2004 01:48:45 +0000
parents 22b768f1261a
children 15cfba1b97b5
comparison
equal deleted inserted replaced
2208:9ca8a88a8a70 2209:c4a476971abc
51 :"+g"(h), "+S"(pixels), "+D"(block) 51 :"+g"(h), "+S"(pixels), "+D"(block)
52 :"r" (line_size) 52 :"r" (line_size)
53 :"%eax", "memory"); 53 :"%eax", "memory");
54 } 54 }
55 55
56 static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
57 {
58 __asm __volatile(
59 "testl $1, %0 \n\t"
60 " jz 1f \n\t"
61 "movd (%1), %%mm0 \n\t"
62 "movd (%2), %%mm1 \n\t"
63 "addl %4, %1 \n\t"
64 "addl $4, %2 \n\t"
65 PAVGB" %%mm1, %%mm0 \n\t"
66 "movd %%mm0, (%3) \n\t"
67 "addl %5, %3 \n\t"
68 "decl %0 \n\t"
69 "1: \n\t"
70 "movd (%1), %%mm0 \n\t"
71 "addl %4, %1 \n\t"
72 "movd (%1), %%mm1 \n\t"
73 "addl %4, %1 \n\t"
74 PAVGB" (%2), %%mm0 \n\t"
75 PAVGB" 4(%2), %%mm1 \n\t"
76 "movd %%mm0, (%3) \n\t"
77 "addl %5, %3 \n\t"
78 "movd %%mm1, (%3) \n\t"
79 "addl %5, %3 \n\t"
80 "movd (%1), %%mm0 \n\t"
81 "addl %4, %1 \n\t"
82 "movd (%1), %%mm1 \n\t"
83 "addl %4, %1 \n\t"
84 PAVGB" 8(%2), %%mm0 \n\t"
85 PAVGB" 12(%2), %%mm1 \n\t"
86 "movd %%mm0, (%3) \n\t"
87 "addl %5, %3 \n\t"
88 "movd %%mm1, (%3) \n\t"
89 "addl %5, %3 \n\t"
90 "addl $16, %2 \n\t"
91 "subl $4, %0 \n\t"
92 "jnz 1b \n\t"
93 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
94 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
95 #else
96 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
97 #endif
98 :"S"(src1Stride), "D"(dstStride)
99 :"memory");
100 }
101
102
56 static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) 103 static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
57 { 104 {
58 __asm __volatile( 105 __asm __volatile(
59 "testl $1, %0 \n\t" 106 "testl $1, %0 \n\t"
60 " jz 1f \n\t" 107 " jz 1f \n\t"
171 /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) 218 /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
172 :"r"(src1Stride), "r"(dstStride) 219 :"r"(src1Stride), "r"(dstStride)
173 :"memory");*/ 220 :"memory");*/
174 } 221 }
175 222
223 static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
224 {
225 __asm __volatile(
226 "testl $1, %0 \n\t"
227 " jz 1f \n\t"
228 "movd (%1), %%mm0 \n\t"
229 "movd (%2), %%mm1 \n\t"
230 "addl %4, %1 \n\t"
231 "addl $4, %2 \n\t"
232 PAVGB" %%mm1, %%mm0 \n\t"
233 PAVGB" (%3), %%mm0 \n\t"
234 "movd %%mm0, (%3) \n\t"
235 "addl %5, %3 \n\t"
236 "decl %0 \n\t"
237 "1: \n\t"
238 "movd (%1), %%mm0 \n\t"
239 "addl %4, %1 \n\t"
240 "movd (%1), %%mm1 \n\t"
241 "addl %4, %1 \n\t"
242 PAVGB" (%2), %%mm0 \n\t"
243 PAVGB" 4(%2), %%mm1 \n\t"
244 PAVGB" (%3), %%mm0 \n\t"
245 "movd %%mm0, (%3) \n\t"
246 "addl %5, %3 \n\t"
247 PAVGB" (%3), %%mm1 \n\t"
248 "movd %%mm1, (%3) \n\t"
249 "addl %5, %3 \n\t"
250 "movd (%1), %%mm0 \n\t"
251 "addl %4, %1 \n\t"
252 "movd (%1), %%mm1 \n\t"
253 "addl %4, %1 \n\t"
254 PAVGB" 8(%2), %%mm0 \n\t"
255 PAVGB" 12(%2), %%mm1 \n\t"
256 PAVGB" (%3), %%mm0 \n\t"
257 "movd %%mm0, (%3) \n\t"
258 "addl %5, %3 \n\t"
259 PAVGB" (%3), %%mm1 \n\t"
260 "movd %%mm1, (%3) \n\t"
261 "addl %5, %3 \n\t"
262 "addl $16, %2 \n\t"
263 "subl $4, %0 \n\t"
264 "jnz 1b \n\t"
265 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
266 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
267 #else
268 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
269 #endif
270 :"S"(src1Stride), "D"(dstStride)
271 :"memory");
272 }
273
274
176 static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) 275 static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
177 { 276 {
178 __asm __volatile( 277 __asm __volatile(
179 "testl $1, %0 \n\t" 278 "testl $1, %0 \n\t"
180 " jz 1f \n\t" 279 " jz 1f \n\t"