Mercurial > libavcodec.hg
comparison i386/dsputil_mmx_avg.h @ 1064:b32afefe7d33 libavcodec
* UINTX -> uintx_t INTX -> intx_t
author | kabi |
---|---|
date | Tue, 11 Feb 2003 16:35:48 +0000 |
parents | 13aec7e50c52 |
children | 07a484280a82 |
comparison
equal
deleted
inserted
replaced
1063:fdeac9642346 | 1064:b32afefe7d33 |
---|---|
23 */ | 23 */ |
24 | 24 |
25 /* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm | 25 /* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm |
26 clobber bug - now it will work with 2.95.2 and also with -fPIC | 26 clobber bug - now it will work with 2.95.2 and also with -fPIC |
27 */ | 27 */ |
28 static void DEF(put_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 28 static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
29 { | 29 { |
30 __asm __volatile( | 30 __asm __volatile( |
31 "lea (%3, %3), %%eax \n\t" | 31 "lea (%3, %3), %%eax \n\t" |
32 "1: \n\t" | 32 "1: \n\t" |
33 "movq (%1), %%mm0 \n\t" | 33 "movq (%1), %%mm0 \n\t" |
83 :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) | 83 :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) |
84 :"r"(src1Stride), "r"(dstStride) | 84 :"r"(src1Stride), "r"(dstStride) |
85 :"memory"); | 85 :"memory"); |
86 } | 86 } |
87 | 87 |
88 static void DEF(put_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 88 static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
89 { | 89 { |
90 __asm __volatile( | 90 __asm __volatile( |
91 "lea (%3, %3), %%eax \n\t" | 91 "lea (%3, %3), %%eax \n\t" |
92 "1: \n\t" | 92 "1: \n\t" |
93 "movq (%1), %%mm0 \n\t" | 93 "movq (%1), %%mm0 \n\t" |
152 :"r"(src1Stride), "r"(dstStride) | 152 :"r"(src1Stride), "r"(dstStride) |
153 :"memory"); | 153 :"memory"); |
154 } | 154 } |
155 | 155 |
156 /* GL: this function does incorrect rounding if overflow */ | 156 /* GL: this function does incorrect rounding if overflow */ |
157 static void DEF(put_no_rnd_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 157 static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
158 { | 158 { |
159 MOVQ_BONE(mm6); | 159 MOVQ_BONE(mm6); |
160 __asm __volatile( | 160 __asm __volatile( |
161 "lea (%3, %3), %%eax \n\t" | 161 "lea (%3, %3), %%eax \n\t" |
162 "1: \n\t" | 162 "1: \n\t" |
189 :"+g"(h), "+S"(pixels), "+D"(block) | 189 :"+g"(h), "+S"(pixels), "+D"(block) |
190 :"r" (line_size) | 190 :"r" (line_size) |
191 :"%eax", "memory"); | 191 :"%eax", "memory"); |
192 } | 192 } |
193 | 193 |
194 static void DEF(put_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 194 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
195 { | 195 { |
196 __asm __volatile( | 196 __asm __volatile( |
197 "lea (%3, %3), %%eax \n\t" | 197 "lea (%3, %3), %%eax \n\t" |
198 "movq (%1), %%mm0 \n\t" | 198 "movq (%1), %%mm0 \n\t" |
199 "subl %3, %2 \n\t" | 199 "subl %3, %2 \n\t" |
220 :"r" (line_size) | 220 :"r" (line_size) |
221 :"%eax", "memory"); | 221 :"%eax", "memory"); |
222 } | 222 } |
223 | 223 |
224 /* GL: this function does incorrect rounding if overflow */ | 224 /* GL: this function does incorrect rounding if overflow */ |
225 static void DEF(put_no_rnd_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 225 static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
226 { | 226 { |
227 MOVQ_BONE(mm6); | 227 MOVQ_BONE(mm6); |
228 __asm __volatile( | 228 __asm __volatile( |
229 "lea (%3, %3), %%eax \n\t" | 229 "lea (%3, %3), %%eax \n\t" |
230 "movq (%1), %%mm0 \n\t" | 230 "movq (%1), %%mm0 \n\t" |
253 :"+g"(h), "+S"(pixels), "+D" (block) | 253 :"+g"(h), "+S"(pixels), "+D" (block) |
254 :"r" (line_size) | 254 :"r" (line_size) |
255 :"%eax", "memory"); | 255 :"%eax", "memory"); |
256 } | 256 } |
257 | 257 |
258 static void DEF(avg_pixels8)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 258 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
259 { | 259 { |
260 __asm __volatile( | 260 __asm __volatile( |
261 "lea (%3, %3), %%eax \n\t" | 261 "lea (%3, %3), %%eax \n\t" |
262 "1: \n\t" | 262 "1: \n\t" |
263 "movq (%2), %%mm0 \n\t" | 263 "movq (%2), %%mm0 \n\t" |
281 :"+g"(h), "+S"(pixels), "+D"(block) | 281 :"+g"(h), "+S"(pixels), "+D"(block) |
282 :"r" (line_size) | 282 :"r" (line_size) |
283 :"%eax", "memory"); | 283 :"%eax", "memory"); |
284 } | 284 } |
285 | 285 |
286 static void DEF(avg_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 286 static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
287 { | 287 { |
288 __asm __volatile( | 288 __asm __volatile( |
289 "lea (%3, %3), %%eax \n\t" | 289 "lea (%3, %3), %%eax \n\t" |
290 "1: \n\t" | 290 "1: \n\t" |
291 "movq (%1), %%mm0 \n\t" | 291 "movq (%1), %%mm0 \n\t" |
313 :"+g"(h), "+S"(pixels), "+D"(block) | 313 :"+g"(h), "+S"(pixels), "+D"(block) |
314 :"r" (line_size) | 314 :"r" (line_size) |
315 :"%eax", "memory"); | 315 :"%eax", "memory"); |
316 } | 316 } |
317 | 317 |
318 static void DEF(avg_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 318 static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
319 { | 319 { |
320 __asm __volatile( | 320 __asm __volatile( |
321 "lea (%3, %3), %%eax \n\t" | 321 "lea (%3, %3), %%eax \n\t" |
322 "movq (%1), %%mm0 \n\t" | 322 "movq (%1), %%mm0 \n\t" |
323 "subl %3, %2 \n\t" | 323 "subl %3, %2 \n\t" |
352 :"r" (line_size) | 352 :"r" (line_size) |
353 :"%eax", "memory"); | 353 :"%eax", "memory"); |
354 } | 354 } |
355 | 355 |
356 // Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter | 356 // Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter |
357 static void DEF(avg_pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 357 static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
358 { | 358 { |
359 MOVQ_BONE(mm6); | 359 MOVQ_BONE(mm6); |
360 __asm __volatile( | 360 __asm __volatile( |
361 "lea (%3, %3), %%eax \n\t" | 361 "lea (%3, %3), %%eax \n\t" |
362 "movq (%1), %%mm0 \n\t" | 362 "movq (%1), %%mm0 \n\t" |
394 :"r" (line_size) | 394 :"r" (line_size) |
395 :"%eax", "memory"); | 395 :"%eax", "memory"); |
396 } | 396 } |
397 | 397 |
398 //FIXME the following could be optimized too ... | 398 //FIXME the following could be optimized too ... |
399 static void DEF(put_no_rnd_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 399 static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
400 DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h); | 400 DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h); |
401 DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h); | 401 DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h); |
402 } | 402 } |
403 static void DEF(put_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 403 static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
404 DEF(put_pixels8_y2)(block , pixels , line_size, h); | 404 DEF(put_pixels8_y2)(block , pixels , line_size, h); |
405 DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h); | 405 DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h); |
406 } | 406 } |
407 static void DEF(put_no_rnd_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 407 static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
408 DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h); | 408 DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h); |
409 DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h); | 409 DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h); |
410 } | 410 } |
411 static void DEF(avg_pixels16)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 411 static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
412 DEF(avg_pixels8)(block , pixels , line_size, h); | 412 DEF(avg_pixels8)(block , pixels , line_size, h); |
413 DEF(avg_pixels8)(block+8, pixels+8, line_size, h); | 413 DEF(avg_pixels8)(block+8, pixels+8, line_size, h); |
414 } | 414 } |
415 static void DEF(avg_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 415 static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
416 DEF(avg_pixels8_x2)(block , pixels , line_size, h); | 416 DEF(avg_pixels8_x2)(block , pixels , line_size, h); |
417 DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h); | 417 DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h); |
418 } | 418 } |
419 static void DEF(avg_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 419 static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
420 DEF(avg_pixels8_y2)(block , pixels , line_size, h); | 420 DEF(avg_pixels8_y2)(block , pixels , line_size, h); |
421 DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h); | 421 DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h); |
422 } | 422 } |
423 static void DEF(avg_pixels16_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 423 static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
424 DEF(avg_pixels8_xy2)(block , pixels , line_size, h); | 424 DEF(avg_pixels8_xy2)(block , pixels , line_size, h); |
425 DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h); | 425 DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h); |
426 } | 426 } |
427 | 427 |