Mercurial > libavcodec.hg
comparison dsputil.c @ 294:944632089814 libavcodec
4MV motion estimation (not finished yet)
SAD functions rewritten (8x8 support & MMX2 optimizations)
HQ inter/intra decission
msmpeg4 encoding bugfix (MV where too long)
author | michaelni |
---|---|
date | Wed, 27 Mar 2002 21:25:22 +0000 |
parents | 4c1cec7c3c7c |
children | c1a8a1b4a24b |
comparison
equal
deleted
inserted
replaced
293:6eaf5da091fa | 294:944632089814 |
---|---|
33 | 33 |
34 op_pixels_abs_func pix_abs16x16; | 34 op_pixels_abs_func pix_abs16x16; |
35 op_pixels_abs_func pix_abs16x16_x2; | 35 op_pixels_abs_func pix_abs16x16_x2; |
36 op_pixels_abs_func pix_abs16x16_y2; | 36 op_pixels_abs_func pix_abs16x16_y2; |
37 op_pixels_abs_func pix_abs16x16_xy2; | 37 op_pixels_abs_func pix_abs16x16_xy2; |
38 | |
39 op_pixels_abs_func pix_abs8x8; | |
40 op_pixels_abs_func pix_abs8x8_x2; | |
41 op_pixels_abs_func pix_abs8x8_y2; | |
42 op_pixels_abs_func pix_abs8x8_xy2; | |
38 | 43 |
39 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; | 44 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
40 UINT32 squareTbl[512]; | 45 UINT32 squareTbl[512]; |
41 | 46 |
42 extern UINT16 default_intra_matrix[64]; | 47 extern UINT16 default_intra_matrix[64]; |
375 { | 380 { |
376 UINT8 *cm = cropTbl + MAX_NEG_CROP; | 381 UINT8 *cm = cropTbl + MAX_NEG_CROP; |
377 int i; | 382 int i; |
378 for(i=0; i<h; i++) | 383 for(i=0; i<h; i++) |
379 { | 384 { |
380 dst[0]= cm[(((src[0]+src[1])*160 - (src[0]+src[2])*48 + (src[1]+src[3])*24 - (src[2]+src[4])*8 + r)>>8)]; | 385 dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)]; |
381 dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)]; | 386 dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)]; |
382 dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)]; | 387 dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)]; |
383 dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)]; | 388 dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)]; |
384 dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)]; | 389 dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)]; |
385 dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)]; | 390 dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)]; |
386 dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)]; | 391 dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)]; |
387 dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)]; | 392 dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)]; |
388 dst+=dstStride; | 393 dst+=dstStride; |
389 src+=srcStride; | 394 src+=srcStride; |
390 } | 395 } |
391 } | 396 } |
392 | 397 |
403 const int src4= src[4*srcStride]; | 408 const int src4= src[4*srcStride]; |
404 const int src5= src[5*srcStride]; | 409 const int src5= src[5*srcStride]; |
405 const int src6= src[6*srcStride]; | 410 const int src6= src[6*srcStride]; |
406 const int src7= src[7*srcStride]; | 411 const int src7= src[7*srcStride]; |
407 const int src8= src[8*srcStride]; | 412 const int src8= src[8*srcStride]; |
408 dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)]; | 413 dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)]; |
409 dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)]; | 414 dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)]; |
410 dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)]; | 415 dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)]; |
411 dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)]; | 416 dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)]; |
412 dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)]; | 417 dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)]; |
413 dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)]; | 418 dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)]; |
414 dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)]; | 419 dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)]; |
415 dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)]; | 420 dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)]; |
416 dst++; | 421 dst++; |
417 src++; | 422 src++; |
418 } | 423 } |
419 } | 424 } |
420 | 425 |
483 }\ | 488 }\ |
484 \ | 489 \ |
485 static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 490 static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
486 {\ | 491 {\ |
487 UINT8 half[64];\ | 492 UINT8 half[64];\ |
488 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ | 493 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\ |
489 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ | 494 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ |
490 }\ | 495 }\ |
491 \ | 496 \ |
492 static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 497 static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
493 {\ | 498 {\ |
494 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ | 499 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\ |
495 }\ | 500 }\ |
496 \ | 501 \ |
497 static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 502 static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
498 {\ | 503 {\ |
499 UINT8 half[64];\ | 504 UINT8 half[64];\ |
500 qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ | 505 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\ |
501 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\ | 506 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\ |
502 }\ | 507 }\ |
503 \ | 508 \ |
504 static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 509 static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
505 {\ | 510 {\ |
506 UINT8 half[64];\ | 511 UINT8 half[64];\ |
507 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ | 512 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\ |
508 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ | 513 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ |
509 }\ | 514 }\ |
510 \ | 515 \ |
511 static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 516 static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
512 {\ | 517 {\ |
513 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ | 518 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\ |
514 }\ | 519 }\ |
515 \ | 520 \ |
516 static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 521 static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
517 {\ | 522 {\ |
518 UINT8 half[64];\ | 523 UINT8 half[64];\ |
519 qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ | 524 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\ |
520 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\ | 525 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\ |
521 }\ | 526 }\ |
522 static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 527 static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
523 {\ | 528 {\ |
524 UINT8 halfH[72];\ | 529 UINT8 halfH[72];\ |
525 UINT8 halfV[64];\ | 530 UINT8 halfV[64];\ |
526 UINT8 halfHV[64];\ | 531 UINT8 halfHV[64];\ |
527 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 532 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
528 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ | 533 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ |
529 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | 534 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ |
530 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ | 535 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ |
531 }\ | 536 }\ |
532 static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 537 static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
533 {\ | 538 {\ |
534 UINT8 halfH[72];\ | 539 UINT8 halfH[72];\ |
535 UINT8 halfV[64];\ | 540 UINT8 halfV[64];\ |
536 UINT8 halfHV[64];\ | 541 UINT8 halfHV[64];\ |
537 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 542 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
538 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ | 543 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ |
539 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | 544 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ |
540 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ | 545 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ |
541 }\ | 546 }\ |
542 static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 547 static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
543 {\ | 548 {\ |
544 UINT8 halfH[72];\ | 549 UINT8 halfH[72];\ |
545 UINT8 halfV[64];\ | 550 UINT8 halfV[64];\ |
546 UINT8 halfHV[64];\ | 551 UINT8 halfHV[64];\ |
547 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 552 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
548 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ | 553 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ |
549 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | 554 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ |
550 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ | 555 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ |
551 }\ | 556 }\ |
552 static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 557 static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
553 {\ | 558 {\ |
554 UINT8 halfH[72];\ | 559 UINT8 halfH[72];\ |
555 UINT8 halfV[64];\ | 560 UINT8 halfV[64];\ |
556 UINT8 halfHV[64];\ | 561 UINT8 halfHV[64];\ |
557 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 562 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
558 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ | 563 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ |
559 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | 564 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ |
560 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ | 565 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ |
561 }\ | 566 }\ |
562 static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 567 static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
563 {\ | 568 {\ |
564 UINT8 halfH[72];\ | 569 UINT8 halfH[72];\ |
565 UINT8 halfHV[64];\ | 570 UINT8 halfHV[64];\ |
566 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 571 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
567 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | 572 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ |
568 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\ | 573 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\ |
569 }\ | 574 }\ |
570 static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 575 static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
571 {\ | 576 {\ |
572 UINT8 halfH[72];\ | 577 UINT8 halfH[72];\ |
573 UINT8 halfHV[64];\ | 578 UINT8 halfHV[64];\ |
574 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 579 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
575 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | 580 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ |
576 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\ | 581 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\ |
577 }\ | 582 }\ |
578 static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 583 static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
579 {\ | 584 {\ |
580 UINT8 halfH[72];\ | 585 UINT8 halfH[72];\ |
581 UINT8 halfV[64];\ | 586 UINT8 halfV[64];\ |
582 UINT8 halfHV[64];\ | 587 UINT8 halfHV[64];\ |
583 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 588 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
584 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ | 589 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ |
585 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | 590 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ |
586 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ | 591 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ |
587 }\ | 592 }\ |
588 static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 593 static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
589 {\ | 594 {\ |
590 UINT8 halfH[72];\ | 595 UINT8 halfH[72];\ |
591 UINT8 halfV[64];\ | 596 UINT8 halfV[64];\ |
592 UINT8 halfHV[64];\ | 597 UINT8 halfHV[64];\ |
593 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 598 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
594 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ | 599 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ |
595 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ | 600 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ |
596 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ | 601 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ |
597 }\ | 602 }\ |
598 static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | 603 static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ |
599 {\ | 604 {\ |
600 UINT8 halfH[72];\ | 605 UINT8 halfH[72];\ |
601 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ | 606 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
602 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\ | 607 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\ |
603 }\ | 608 }\ |
604 qpel_mc_func qpel_mc ## name ## _tab[16]={ \ | 609 qpel_mc_func qpel_mc ## name ## _tab[16]={ \ |
605 qpel_mc00_c ## name, \ | 610 qpel_mc00_c ## name, \ |
606 qpel_mc10_c ## name, \ | 611 qpel_mc10_c ## name, \ |
607 qpel_mc20_c ## name, \ | 612 qpel_mc20_c ## name, \ |
621 }; | 626 }; |
622 | 627 |
623 QPEL_MC(0, _rnd) | 628 QPEL_MC(0, _rnd) |
624 QPEL_MC(1, _no_rnd) | 629 QPEL_MC(1, _no_rnd) |
625 | 630 |
626 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) | 631 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
627 { | 632 { |
628 int s, i; | 633 int s, i; |
629 | 634 |
630 s = 0; | 635 s = 0; |
631 for(i=0;i<h;i++) { | 636 for(i=0;i<16;i++) { |
632 s += abs(pix1[0] - pix2[0]); | 637 s += abs(pix1[0] - pix2[0]); |
633 s += abs(pix1[1] - pix2[1]); | 638 s += abs(pix1[1] - pix2[1]); |
634 s += abs(pix1[2] - pix2[2]); | 639 s += abs(pix1[2] - pix2[2]); |
635 s += abs(pix1[3] - pix2[3]); | 640 s += abs(pix1[3] - pix2[3]); |
636 s += abs(pix1[4] - pix2[4]); | 641 s += abs(pix1[4] - pix2[4]); |
649 pix2 += line_size; | 654 pix2 += line_size; |
650 } | 655 } |
651 return s; | 656 return s; |
652 } | 657 } |
653 | 658 |
654 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) | 659 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
655 { | 660 { |
656 int s, i; | 661 int s, i; |
657 | 662 |
658 s = 0; | 663 s = 0; |
659 for(i=0;i<h;i++) { | 664 for(i=0;i<16;i++) { |
660 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | 665 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
661 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | 666 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); |
662 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | 667 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); |
663 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | 668 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); |
664 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | 669 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); |
677 pix2 += line_size; | 682 pix2 += line_size; |
678 } | 683 } |
679 return s; | 684 return s; |
680 } | 685 } |
681 | 686 |
682 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) | 687 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
683 { | 688 { |
684 int s, i; | 689 int s, i; |
685 UINT8 *pix3 = pix2 + line_size; | 690 UINT8 *pix3 = pix2 + line_size; |
686 | 691 |
687 s = 0; | 692 s = 0; |
688 for(i=0;i<h;i++) { | 693 for(i=0;i<16;i++) { |
689 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | 694 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
690 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | 695 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); |
691 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | 696 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); |
692 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | 697 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); |
693 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | 698 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); |
707 pix3 += line_size; | 712 pix3 += line_size; |
708 } | 713 } |
709 return s; | 714 return s; |
710 } | 715 } |
711 | 716 |
712 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) | 717 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
713 { | 718 { |
714 int s, i; | 719 int s, i; |
715 UINT8 *pix3 = pix2 + line_size; | 720 UINT8 *pix3 = pix2 + line_size; |
716 | 721 |
717 s = 0; | 722 s = 0; |
718 for(i=0;i<h;i++) { | 723 for(i=0;i<16;i++) { |
719 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | 724 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
720 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | 725 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); |
721 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | 726 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); |
722 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | 727 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); |
723 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | 728 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); |
737 pix3 += line_size; | 742 pix3 += line_size; |
738 } | 743 } |
739 return s; | 744 return s; |
740 } | 745 } |
741 | 746 |
747 int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |
748 { | |
749 int s, i; | |
750 | |
751 s = 0; | |
752 for(i=0;i<8;i++) { | |
753 s += abs(pix1[0] - pix2[0]); | |
754 s += abs(pix1[1] - pix2[1]); | |
755 s += abs(pix1[2] - pix2[2]); | |
756 s += abs(pix1[3] - pix2[3]); | |
757 s += abs(pix1[4] - pix2[4]); | |
758 s += abs(pix1[5] - pix2[5]); | |
759 s += abs(pix1[6] - pix2[6]); | |
760 s += abs(pix1[7] - pix2[7]); | |
761 pix1 += line_size; | |
762 pix2 += line_size; | |
763 } | |
764 return s; | |
765 } | |
766 | |
767 int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |
768 { | |
769 int s, i; | |
770 | |
771 s = 0; | |
772 for(i=0;i<8;i++) { | |
773 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | |
774 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
775 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
776 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
777 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
778 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
779 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
780 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
781 pix1 += line_size; | |
782 pix2 += line_size; | |
783 } | |
784 return s; | |
785 } | |
786 | |
787 int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |
788 { | |
789 int s, i; | |
790 UINT8 *pix3 = pix2 + line_size; | |
791 | |
792 s = 0; | |
793 for(i=0;i<8;i++) { | |
794 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | |
795 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
796 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
797 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
798 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
799 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
800 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
801 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
802 pix1 += line_size; | |
803 pix2 += line_size; | |
804 pix3 += line_size; | |
805 } | |
806 return s; | |
807 } | |
808 | |
809 int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |
810 { | |
811 int s, i; | |
812 UINT8 *pix3 = pix2 + line_size; | |
813 | |
814 s = 0; | |
815 for(i=0;i<8;i++) { | |
816 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | |
817 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
818 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
819 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
820 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
821 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
822 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
823 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
824 pix1 += line_size; | |
825 pix2 += line_size; | |
826 pix3 += line_size; | |
827 } | |
828 return s; | |
829 } | |
830 | |
742 /* permute block according so that it corresponds to the MMX idct | 831 /* permute block according so that it corresponds to the MMX idct |
743 order */ | 832 order */ |
744 #ifdef SIMPLE_IDCT | 833 #ifdef SIMPLE_IDCT |
745 /* general permutation, but perhaps slightly slower */ | 834 /* general permutation, but perhaps slightly slower */ |
746 void block_permute(INT16 *block) | 835 void block_permute(INT16 *block) |
800 get_pixels = get_pixels_c; | 889 get_pixels = get_pixels_c; |
801 put_pixels_clamped = put_pixels_clamped_c; | 890 put_pixels_clamped = put_pixels_clamped_c; |
802 add_pixels_clamped = add_pixels_clamped_c; | 891 add_pixels_clamped = add_pixels_clamped_c; |
803 gmc1= gmc1_c; | 892 gmc1= gmc1_c; |
804 | 893 |
805 pix_abs16x16 = pix_abs16x16_c; | 894 pix_abs16x16 = pix_abs16x16_c; |
806 pix_abs16x16_x2 = pix_abs16x16_x2_c; | 895 pix_abs16x16_x2 = pix_abs16x16_x2_c; |
807 pix_abs16x16_y2 = pix_abs16x16_y2_c; | 896 pix_abs16x16_y2 = pix_abs16x16_y2_c; |
808 pix_abs16x16_xy2 = pix_abs16x16_xy2_c; | 897 pix_abs16x16_xy2 = pix_abs16x16_xy2_c; |
898 pix_abs8x8 = pix_abs8x8_c; | |
899 pix_abs8x8_x2 = pix_abs8x8_x2_c; | |
900 pix_abs8x8_y2 = pix_abs8x8_y2_c; | |
901 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | |
809 av_fdct = jpeg_fdct_ifast; | 902 av_fdct = jpeg_fdct_ifast; |
810 | 903 |
811 use_permuted_idct = 1; | 904 use_permuted_idct = 1; |
812 | 905 |
813 #ifdef HAVE_MMX | 906 #ifdef HAVE_MMX |