comparison i386/dsputil_mmx_avg.h @ 1064:b32afefe7d33 libavcodec

* UINTX -> uintx_t INTX -> intx_t
author kabi
date Tue, 11 Feb 2003 16:35:48 +0000
parents 13aec7e50c52
children 07a484280a82
comparison
equal deleted inserted replaced
1063:fdeac9642346 1064:b32afefe7d33
23 */ 23 */
24 24
25 /* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm 25 /* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
26 clobber bug - now it will work with 2.95.2 and also with -fPIC 26 clobber bug - now it will work with 2.95.2 and also with -fPIC
27 */ 27 */
28 static void DEF(put_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 28 static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
29 { 29 {
30 __asm __volatile( 30 __asm __volatile(
31 "lea (%3, %3), %%eax \n\t" 31 "lea (%3, %3), %%eax \n\t"
32 "1: \n\t" 32 "1: \n\t"
33 "movq (%1), %%mm0 \n\t" 33 "movq (%1), %%mm0 \n\t"
83 :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) 83 :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
84 :"r"(src1Stride), "r"(dstStride) 84 :"r"(src1Stride), "r"(dstStride)
85 :"memory"); 85 :"memory");
86 } 86 }
87 87
88 static void DEF(put_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 88 static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
89 { 89 {
90 __asm __volatile( 90 __asm __volatile(
91 "lea (%3, %3), %%eax \n\t" 91 "lea (%3, %3), %%eax \n\t"
92 "1: \n\t" 92 "1: \n\t"
93 "movq (%1), %%mm0 \n\t" 93 "movq (%1), %%mm0 \n\t"
152 :"r"(src1Stride), "r"(dstStride) 152 :"r"(src1Stride), "r"(dstStride)
153 :"memory"); 153 :"memory");
154 } 154 }
155 155
156 /* GL: this function does incorrect rounding if overflow */ 156 /* GL: this function does incorrect rounding if overflow */
157 static void DEF(put_no_rnd_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 157 static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
158 { 158 {
159 MOVQ_BONE(mm6); 159 MOVQ_BONE(mm6);
160 __asm __volatile( 160 __asm __volatile(
161 "lea (%3, %3), %%eax \n\t" 161 "lea (%3, %3), %%eax \n\t"
162 "1: \n\t" 162 "1: \n\t"
189 :"+g"(h), "+S"(pixels), "+D"(block) 189 :"+g"(h), "+S"(pixels), "+D"(block)
190 :"r" (line_size) 190 :"r" (line_size)
191 :"%eax", "memory"); 191 :"%eax", "memory");
192 } 192 }
193 193
194 static void DEF(put_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 194 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
195 { 195 {
196 __asm __volatile( 196 __asm __volatile(
197 "lea (%3, %3), %%eax \n\t" 197 "lea (%3, %3), %%eax \n\t"
198 "movq (%1), %%mm0 \n\t" 198 "movq (%1), %%mm0 \n\t"
199 "subl %3, %2 \n\t" 199 "subl %3, %2 \n\t"
220 :"r" (line_size) 220 :"r" (line_size)
221 :"%eax", "memory"); 221 :"%eax", "memory");
222 } 222 }
223 223
224 /* GL: this function does incorrect rounding if overflow */ 224 /* GL: this function does incorrect rounding if overflow */
225 static void DEF(put_no_rnd_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 225 static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
226 { 226 {
227 MOVQ_BONE(mm6); 227 MOVQ_BONE(mm6);
228 __asm __volatile( 228 __asm __volatile(
229 "lea (%3, %3), %%eax \n\t" 229 "lea (%3, %3), %%eax \n\t"
230 "movq (%1), %%mm0 \n\t" 230 "movq (%1), %%mm0 \n\t"
253 :"+g"(h), "+S"(pixels), "+D" (block) 253 :"+g"(h), "+S"(pixels), "+D" (block)
254 :"r" (line_size) 254 :"r" (line_size)
255 :"%eax", "memory"); 255 :"%eax", "memory");
256 } 256 }
257 257
258 static void DEF(avg_pixels8)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 258 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
259 { 259 {
260 __asm __volatile( 260 __asm __volatile(
261 "lea (%3, %3), %%eax \n\t" 261 "lea (%3, %3), %%eax \n\t"
262 "1: \n\t" 262 "1: \n\t"
263 "movq (%2), %%mm0 \n\t" 263 "movq (%2), %%mm0 \n\t"
281 :"+g"(h), "+S"(pixels), "+D"(block) 281 :"+g"(h), "+S"(pixels), "+D"(block)
282 :"r" (line_size) 282 :"r" (line_size)
283 :"%eax", "memory"); 283 :"%eax", "memory");
284 } 284 }
285 285
286 static void DEF(avg_pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 286 static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
287 { 287 {
288 __asm __volatile( 288 __asm __volatile(
289 "lea (%3, %3), %%eax \n\t" 289 "lea (%3, %3), %%eax \n\t"
290 "1: \n\t" 290 "1: \n\t"
291 "movq (%1), %%mm0 \n\t" 291 "movq (%1), %%mm0 \n\t"
313 :"+g"(h), "+S"(pixels), "+D"(block) 313 :"+g"(h), "+S"(pixels), "+D"(block)
314 :"r" (line_size) 314 :"r" (line_size)
315 :"%eax", "memory"); 315 :"%eax", "memory");
316 } 316 }
317 317
318 static void DEF(avg_pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 318 static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
319 { 319 {
320 __asm __volatile( 320 __asm __volatile(
321 "lea (%3, %3), %%eax \n\t" 321 "lea (%3, %3), %%eax \n\t"
322 "movq (%1), %%mm0 \n\t" 322 "movq (%1), %%mm0 \n\t"
323 "subl %3, %2 \n\t" 323 "subl %3, %2 \n\t"
352 :"r" (line_size) 352 :"r" (line_size)
353 :"%eax", "memory"); 353 :"%eax", "memory");
354 } 354 }
355 355
356 // Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter 356 // Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
357 static void DEF(avg_pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 357 static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
358 { 358 {
359 MOVQ_BONE(mm6); 359 MOVQ_BONE(mm6);
360 __asm __volatile( 360 __asm __volatile(
361 "lea (%3, %3), %%eax \n\t" 361 "lea (%3, %3), %%eax \n\t"
362 "movq (%1), %%mm0 \n\t" 362 "movq (%1), %%mm0 \n\t"
394 :"r" (line_size) 394 :"r" (line_size)
395 :"%eax", "memory"); 395 :"%eax", "memory");
396 } 396 }
397 397
398 //FIXME the following could be optimized too ... 398 //FIXME the following could be optimized too ...
399 static void DEF(put_no_rnd_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 399 static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
400 DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h); 400 DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h);
401 DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h); 401 DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
402 } 402 }
403 static void DEF(put_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 403 static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
404 DEF(put_pixels8_y2)(block , pixels , line_size, h); 404 DEF(put_pixels8_y2)(block , pixels , line_size, h);
405 DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h); 405 DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);
406 } 406 }
407 static void DEF(put_no_rnd_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 407 static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
408 DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h); 408 DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h);
409 DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h); 409 DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
410 } 410 }
411 static void DEF(avg_pixels16)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 411 static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
412 DEF(avg_pixels8)(block , pixels , line_size, h); 412 DEF(avg_pixels8)(block , pixels , line_size, h);
413 DEF(avg_pixels8)(block+8, pixels+8, line_size, h); 413 DEF(avg_pixels8)(block+8, pixels+8, line_size, h);
414 } 414 }
415 static void DEF(avg_pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 415 static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
416 DEF(avg_pixels8_x2)(block , pixels , line_size, h); 416 DEF(avg_pixels8_x2)(block , pixels , line_size, h);
417 DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h); 417 DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);
418 } 418 }
419 static void DEF(avg_pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 419 static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
420 DEF(avg_pixels8_y2)(block , pixels , line_size, h); 420 DEF(avg_pixels8_y2)(block , pixels , line_size, h);
421 DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h); 421 DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);
422 } 422 }
423 static void DEF(avg_pixels16_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 423 static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
424 DEF(avg_pixels8_xy2)(block , pixels , line_size, h); 424 DEF(avg_pixels8_xy2)(block , pixels , line_size, h);
425 DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h); 425 DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
426 } 426 }
427 427