comparison dsputil.c @ 294:944632089814 libavcodec

4MV motion estimation (not finished yet) SAD functions rewritten (8x8 support & MMX2 optimizations) HQ inter/intra decission msmpeg4 encoding bugfix (MV where too long)
author michaelni
date Wed, 27 Mar 2002 21:25:22 +0000
parents 4c1cec7c3c7c
children c1a8a1b4a24b
comparison
equal deleted inserted replaced
293:6eaf5da091fa 294:944632089814
33 33
34 op_pixels_abs_func pix_abs16x16; 34 op_pixels_abs_func pix_abs16x16;
35 op_pixels_abs_func pix_abs16x16_x2; 35 op_pixels_abs_func pix_abs16x16_x2;
36 op_pixels_abs_func pix_abs16x16_y2; 36 op_pixels_abs_func pix_abs16x16_y2;
37 op_pixels_abs_func pix_abs16x16_xy2; 37 op_pixels_abs_func pix_abs16x16_xy2;
38
39 op_pixels_abs_func pix_abs8x8;
40 op_pixels_abs_func pix_abs8x8_x2;
41 op_pixels_abs_func pix_abs8x8_y2;
42 op_pixels_abs_func pix_abs8x8_xy2;
38 43
39 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; 44 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
40 UINT32 squareTbl[512]; 45 UINT32 squareTbl[512];
41 46
42 extern UINT16 default_intra_matrix[64]; 47 extern UINT16 default_intra_matrix[64];
375 { 380 {
376 UINT8 *cm = cropTbl + MAX_NEG_CROP; 381 UINT8 *cm = cropTbl + MAX_NEG_CROP;
377 int i; 382 int i;
378 for(i=0; i<h; i++) 383 for(i=0; i<h; i++)
379 { 384 {
380 dst[0]= cm[(((src[0]+src[1])*160 - (src[0]+src[2])*48 + (src[1]+src[3])*24 - (src[2]+src[4])*8 + r)>>8)]; 385 dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
381 dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)]; 386 dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
382 dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)]; 387 dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
383 dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)]; 388 dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
384 dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)]; 389 dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
385 dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)]; 390 dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
386 dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)]; 391 dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
387 dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)]; 392 dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
388 dst+=dstStride; 393 dst+=dstStride;
389 src+=srcStride; 394 src+=srcStride;
390 } 395 }
391 } 396 }
392 397
403 const int src4= src[4*srcStride]; 408 const int src4= src[4*srcStride];
404 const int src5= src[5*srcStride]; 409 const int src5= src[5*srcStride];
405 const int src6= src[6*srcStride]; 410 const int src6= src[6*srcStride];
406 const int src7= src[7*srcStride]; 411 const int src7= src[7*srcStride];
407 const int src8= src[8*srcStride]; 412 const int src8= src[8*srcStride];
408 dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)]; 413 dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
409 dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)]; 414 dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
410 dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)]; 415 dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
411 dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)]; 416 dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
412 dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)]; 417 dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
413 dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)]; 418 dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
414 dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)]; 419 dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
415 dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)]; 420 dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
416 dst++; 421 dst++;
417 src++; 422 src++;
418 } 423 }
419 } 424 }
420 425
483 }\ 488 }\
484 \ 489 \
485 static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 490 static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
486 {\ 491 {\
487 UINT8 half[64];\ 492 UINT8 half[64];\
488 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ 493 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
489 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ 494 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
490 }\ 495 }\
491 \ 496 \
492 static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 497 static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
493 {\ 498 {\
494 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ 499 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
495 }\ 500 }\
496 \ 501 \
497 static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 502 static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
498 {\ 503 {\
499 UINT8 half[64];\ 504 UINT8 half[64];\
500 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ 505 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
501 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\ 506 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
502 }\ 507 }\
503 \ 508 \
504 static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 509 static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
505 {\ 510 {\
506 UINT8 half[64];\ 511 UINT8 half[64];\
507 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ 512 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
508 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ 513 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
509 }\ 514 }\
510 \ 515 \
511 static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 516 static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
512 {\ 517 {\
513 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ 518 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
514 }\ 519 }\
515 \ 520 \
516 static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 521 static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
517 {\ 522 {\
518 UINT8 half[64];\ 523 UINT8 half[64];\
519 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ 524 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
520 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\ 525 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
521 }\ 526 }\
522 static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 527 static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
523 {\ 528 {\
524 UINT8 halfH[72];\ 529 UINT8 halfH[72];\
525 UINT8 halfV[64];\ 530 UINT8 halfV[64];\
526 UINT8 halfHV[64];\ 531 UINT8 halfHV[64];\
527 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 532 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
528 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ 533 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
529 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ 534 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
530 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ 535 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
531 }\ 536 }\
532 static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 537 static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
533 {\ 538 {\
534 UINT8 halfH[72];\ 539 UINT8 halfH[72];\
535 UINT8 halfV[64];\ 540 UINT8 halfV[64];\
536 UINT8 halfHV[64];\ 541 UINT8 halfHV[64];\
537 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 542 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
538 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ 543 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
539 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ 544 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
540 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ 545 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
541 }\ 546 }\
542 static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 547 static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
543 {\ 548 {\
544 UINT8 halfH[72];\ 549 UINT8 halfH[72];\
545 UINT8 halfV[64];\ 550 UINT8 halfV[64];\
546 UINT8 halfHV[64];\ 551 UINT8 halfHV[64];\
547 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 552 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
548 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ 553 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
549 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ 554 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
550 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ 555 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
551 }\ 556 }\
552 static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 557 static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
553 {\ 558 {\
554 UINT8 halfH[72];\ 559 UINT8 halfH[72];\
555 UINT8 halfV[64];\ 560 UINT8 halfV[64];\
556 UINT8 halfHV[64];\ 561 UINT8 halfHV[64];\
557 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 562 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
558 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ 563 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
559 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ 564 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
560 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ 565 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
561 }\ 566 }\
562 static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 567 static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
563 {\ 568 {\
564 UINT8 halfH[72];\ 569 UINT8 halfH[72];\
565 UINT8 halfHV[64];\ 570 UINT8 halfHV[64];\
566 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 571 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
567 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ 572 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
568 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\ 573 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
569 }\ 574 }\
570 static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 575 static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
571 {\ 576 {\
572 UINT8 halfH[72];\ 577 UINT8 halfH[72];\
573 UINT8 halfHV[64];\ 578 UINT8 halfHV[64];\
574 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 579 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
575 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ 580 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
576 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\ 581 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
577 }\ 582 }\
578 static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 583 static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
579 {\ 584 {\
580 UINT8 halfH[72];\ 585 UINT8 halfH[72];\
581 UINT8 halfV[64];\ 586 UINT8 halfV[64];\
582 UINT8 halfHV[64];\ 587 UINT8 halfHV[64];\
583 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 588 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
584 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ 589 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
585 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ 590 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
586 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ 591 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
587 }\ 592 }\
588 static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 593 static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
589 {\ 594 {\
590 UINT8 halfH[72];\ 595 UINT8 halfH[72];\
591 UINT8 halfV[64];\ 596 UINT8 halfV[64];\
592 UINT8 halfHV[64];\ 597 UINT8 halfHV[64];\
593 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 598 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
594 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ 599 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
595 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ 600 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
596 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ 601 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
597 }\ 602 }\
598 static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ 603 static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
599 {\ 604 {\
600 UINT8 halfH[72];\ 605 UINT8 halfH[72];\
601 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ 606 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
602 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\ 607 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
603 }\ 608 }\
604 qpel_mc_func qpel_mc ## name ## _tab[16]={ \ 609 qpel_mc_func qpel_mc ## name ## _tab[16]={ \
605 qpel_mc00_c ## name, \ 610 qpel_mc00_c ## name, \
606 qpel_mc10_c ## name, \ 611 qpel_mc10_c ## name, \
607 qpel_mc20_c ## name, \ 612 qpel_mc20_c ## name, \
621 }; 626 };
622 627
623 QPEL_MC(0, _rnd) 628 QPEL_MC(0, _rnd)
624 QPEL_MC(1, _no_rnd) 629 QPEL_MC(1, _no_rnd)
625 630
626 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) 631 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
627 { 632 {
628 int s, i; 633 int s, i;
629 634
630 s = 0; 635 s = 0;
631 for(i=0;i<h;i++) { 636 for(i=0;i<16;i++) {
632 s += abs(pix1[0] - pix2[0]); 637 s += abs(pix1[0] - pix2[0]);
633 s += abs(pix1[1] - pix2[1]); 638 s += abs(pix1[1] - pix2[1]);
634 s += abs(pix1[2] - pix2[2]); 639 s += abs(pix1[2] - pix2[2]);
635 s += abs(pix1[3] - pix2[3]); 640 s += abs(pix1[3] - pix2[3]);
636 s += abs(pix1[4] - pix2[4]); 641 s += abs(pix1[4] - pix2[4]);
649 pix2 += line_size; 654 pix2 += line_size;
650 } 655 }
651 return s; 656 return s;
652 } 657 }
653 658
654 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) 659 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
655 { 660 {
656 int s, i; 661 int s, i;
657 662
658 s = 0; 663 s = 0;
659 for(i=0;i<h;i++) { 664 for(i=0;i<16;i++) {
660 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 665 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
661 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 666 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
662 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 667 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
663 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 668 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
664 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 669 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
677 pix2 += line_size; 682 pix2 += line_size;
678 } 683 }
679 return s; 684 return s;
680 } 685 }
681 686
682 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) 687 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
683 { 688 {
684 int s, i; 689 int s, i;
685 UINT8 *pix3 = pix2 + line_size; 690 UINT8 *pix3 = pix2 + line_size;
686 691
687 s = 0; 692 s = 0;
688 for(i=0;i<h;i++) { 693 for(i=0;i<16;i++) {
689 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 694 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
690 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 695 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
691 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 696 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
692 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 697 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
693 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 698 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
707 pix3 += line_size; 712 pix3 += line_size;
708 } 713 }
709 return s; 714 return s;
710 } 715 }
711 716
712 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) 717 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
713 { 718 {
714 int s, i; 719 int s, i;
715 UINT8 *pix3 = pix2 + line_size; 720 UINT8 *pix3 = pix2 + line_size;
716 721
717 s = 0; 722 s = 0;
718 for(i=0;i<h;i++) { 723 for(i=0;i<16;i++) {
719 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 724 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
720 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 725 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
721 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 726 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
722 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 727 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
723 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 728 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
737 pix3 += line_size; 742 pix3 += line_size;
738 } 743 }
739 return s; 744 return s;
740 } 745 }
741 746
747 int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
748 {
749 int s, i;
750
751 s = 0;
752 for(i=0;i<8;i++) {
753 s += abs(pix1[0] - pix2[0]);
754 s += abs(pix1[1] - pix2[1]);
755 s += abs(pix1[2] - pix2[2]);
756 s += abs(pix1[3] - pix2[3]);
757 s += abs(pix1[4] - pix2[4]);
758 s += abs(pix1[5] - pix2[5]);
759 s += abs(pix1[6] - pix2[6]);
760 s += abs(pix1[7] - pix2[7]);
761 pix1 += line_size;
762 pix2 += line_size;
763 }
764 return s;
765 }
766
767 int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
768 {
769 int s, i;
770
771 s = 0;
772 for(i=0;i<8;i++) {
773 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
774 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
775 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
776 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
777 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
778 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
779 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
780 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
781 pix1 += line_size;
782 pix2 += line_size;
783 }
784 return s;
785 }
786
787 int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
788 {
789 int s, i;
790 UINT8 *pix3 = pix2 + line_size;
791
792 s = 0;
793 for(i=0;i<8;i++) {
794 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
795 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
796 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
797 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
798 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
799 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
800 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
801 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
802 pix1 += line_size;
803 pix2 += line_size;
804 pix3 += line_size;
805 }
806 return s;
807 }
808
809 int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
810 {
811 int s, i;
812 UINT8 *pix3 = pix2 + line_size;
813
814 s = 0;
815 for(i=0;i<8;i++) {
816 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
817 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
818 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
819 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
820 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
821 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
822 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
823 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
824 pix1 += line_size;
825 pix2 += line_size;
826 pix3 += line_size;
827 }
828 return s;
829 }
830
742 /* permute block according so that it corresponds to the MMX idct 831 /* permute block according so that it corresponds to the MMX idct
743 order */ 832 order */
744 #ifdef SIMPLE_IDCT 833 #ifdef SIMPLE_IDCT
745 /* general permutation, but perhaps slightly slower */ 834 /* general permutation, but perhaps slightly slower */
746 void block_permute(INT16 *block) 835 void block_permute(INT16 *block)
800 get_pixels = get_pixels_c; 889 get_pixels = get_pixels_c;
801 put_pixels_clamped = put_pixels_clamped_c; 890 put_pixels_clamped = put_pixels_clamped_c;
802 add_pixels_clamped = add_pixels_clamped_c; 891 add_pixels_clamped = add_pixels_clamped_c;
803 gmc1= gmc1_c; 892 gmc1= gmc1_c;
804 893
805 pix_abs16x16 = pix_abs16x16_c; 894 pix_abs16x16 = pix_abs16x16_c;
806 pix_abs16x16_x2 = pix_abs16x16_x2_c; 895 pix_abs16x16_x2 = pix_abs16x16_x2_c;
807 pix_abs16x16_y2 = pix_abs16x16_y2_c; 896 pix_abs16x16_y2 = pix_abs16x16_y2_c;
808 pix_abs16x16_xy2 = pix_abs16x16_xy2_c; 897 pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
898 pix_abs8x8 = pix_abs8x8_c;
899 pix_abs8x8_x2 = pix_abs8x8_x2_c;
900 pix_abs8x8_y2 = pix_abs8x8_y2_c;
901 pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
809 av_fdct = jpeg_fdct_ifast; 902 av_fdct = jpeg_fdct_ifast;
810 903
811 use_permuted_idct = 1; 904 use_permuted_idct = 1;
812 905
813 #ifdef HAVE_MMX 906 #ifdef HAVE_MMX