Mercurial > libavcodec.hg
comparison dsputil.c @ 1267:85b71f9f7450 libavcodec
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
author | michaelni |
---|---|
date | Thu, 15 May 2003 23:30:03 +0000 |
parents | 2fa34e615c76 |
children | a979fab41ed8 |
comparison
equal
deleted
inserted
replaced
1266:ec946cb74397 | 1267:85b71f9f7450 |
---|---|
464 | 464 |
465 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | 465 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) |
466 #else // 64 bit variant | 466 #else // 64 bit variant |
467 | 467 |
468 #define PIXOP2(OPNAME, OP) \ | 468 #define PIXOP2(OPNAME, OP) \ |
469 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
470 int i;\ | |
471 for(i=0; i<h; i++){\ | |
472 OP(*((uint16_t*)(block )), LD16(pixels ));\ | |
473 pixels+=line_size;\ | |
474 block +=line_size;\ | |
475 }\ | |
476 }\ | |
469 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | 477 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
470 int i;\ | 478 int i;\ |
471 for(i=0; i<h; i++){\ | 479 for(i=0; i<h; i++){\ |
472 OP(*((uint32_t*)(block )), LD32(pixels ));\ | 480 OP(*((uint32_t*)(block )), LD32(pixels ));\ |
473 pixels+=line_size;\ | 481 pixels+=line_size;\ |
521 for(i=0; i<h; i++){\ | 529 for(i=0; i<h; i++){\ |
522 uint32_t a,b;\ | 530 uint32_t a,b;\ |
523 a= LD32(&src1[i*src_stride1 ]);\ | 531 a= LD32(&src1[i*src_stride1 ]);\ |
524 b= LD32(&src2[i*src_stride2 ]);\ | 532 b= LD32(&src2[i*src_stride2 ]);\ |
525 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | 533 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
534 }\ | |
535 }\ | |
536 \ | |
537 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
538 int src_stride1, int src_stride2, int h){\ | |
539 int i;\ | |
540 for(i=0; i<h; i++){\ | |
541 uint32_t a,b;\ | |
542 a= LD16(&src1[i*src_stride1 ]);\ | |
543 b= LD16(&src2[i*src_stride2 ]);\ | |
544 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | |
526 }\ | 545 }\ |
527 }\ | 546 }\ |
528 \ | 547 \ |
529 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | 548 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
530 int src_stride1, int src_stride2, int h){\ | 549 int src_stride1, int src_stride2, int h){\ |
587 h1= ((c&0xFCFCFCFCUL)>>2)\ | 606 h1= ((c&0xFCFCFCFCUL)>>2)\ |
588 + ((d&0xFCFCFCFCUL)>>2);\ | 607 + ((d&0xFCFCFCFCUL)>>2);\ |
589 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | 608 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
590 }\ | 609 }\ |
591 }\ | 610 }\ |
611 \ | |
612 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
613 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | |
614 }\ | |
615 \ | |
616 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
617 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | |
618 }\ | |
619 \ | |
620 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
621 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | |
622 }\ | |
623 \ | |
624 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
625 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | |
626 }\ | |
627 \ | |
592 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | 628 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
593 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 629 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
594 int i;\ | 630 int i;\ |
595 for(i=0; i<h; i++){\ | 631 for(i=0; i<h; i++){\ |
596 uint32_t a, b, c, d, l0, l1, h0, h1;\ | 632 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
631 }\ | 667 }\ |
632 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | 668 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
633 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 669 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
634 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | 670 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ |
635 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | 671 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ |
672 }\ | |
673 \ | |
674 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
675 {\ | |
676 int i, a0, b0, a1, b1;\ | |
677 a0= pixels[0];\ | |
678 b0= pixels[1] + 2;\ | |
679 a0 += b0;\ | |
680 b0 += pixels[2];\ | |
681 \ | |
682 pixels+=line_size;\ | |
683 for(i=0; i<h; i+=2){\ | |
684 a1= pixels[0];\ | |
685 b1= pixels[1];\ | |
686 a1 += b1;\ | |
687 b1 += pixels[2];\ | |
688 \ | |
689 block[0]= (a1+a0)>>2; /* FIXME non put */\ | |
690 block[1]= (b1+b0)>>2;\ | |
691 \ | |
692 pixels+=line_size;\ | |
693 block +=line_size;\ | |
694 \ | |
695 a0= pixels[0];\ | |
696 b0= pixels[1] + 2;\ | |
697 a0 += b0;\ | |
698 b0 += pixels[2];\ | |
699 \ | |
700 block[0]= (a1+a0)>>2;\ | |
701 block[1]= (b1+b0)>>2;\ | |
702 pixels+=line_size;\ | |
703 block +=line_size;\ | |
704 }\ | |
705 }\ | |
706 \ | |
707 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
708 {\ | |
709 int i;\ | |
710 const uint32_t a= LD32(pixels );\ | |
711 const uint32_t b= LD32(pixels+1);\ | |
712 uint32_t l0= (a&0x03030303UL)\ | |
713 + (b&0x03030303UL)\ | |
714 + 0x02020202UL;\ | |
715 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
716 + ((b&0xFCFCFCFCUL)>>2);\ | |
717 uint32_t l1,h1;\ | |
718 \ | |
719 pixels+=line_size;\ | |
720 for(i=0; i<h; i+=2){\ | |
721 uint32_t a= LD32(pixels );\ | |
722 uint32_t b= LD32(pixels+1);\ | |
723 l1= (a&0x03030303UL)\ | |
724 + (b&0x03030303UL);\ | |
725 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
726 + ((b&0xFCFCFCFCUL)>>2);\ | |
727 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
728 pixels+=line_size;\ | |
729 block +=line_size;\ | |
730 a= LD32(pixels );\ | |
731 b= LD32(pixels+1);\ | |
732 l0= (a&0x03030303UL)\ | |
733 + (b&0x03030303UL)\ | |
734 + 0x02020202UL;\ | |
735 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
736 + ((b&0xFCFCFCFCUL)>>2);\ | |
737 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
738 pixels+=line_size;\ | |
739 block +=line_size;\ | |
740 }\ | |
636 }\ | 741 }\ |
637 \ | 742 \ |
638 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 743 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
639 {\ | 744 {\ |
640 int j;\ | 745 int j;\ |
817 } | 922 } |
818 ox += dxy; | 923 ox += dxy; |
819 oy += dyy; | 924 oy += dyy; |
820 } | 925 } |
821 } | 926 } |
927 | |
928 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
929 switch(width){ | |
930 case 2: put_pixels2_c (dst, src, stride, height); break; | |
931 case 4: put_pixels4_c (dst, src, stride, height); break; | |
932 case 8: put_pixels8_c (dst, src, stride, height); break; | |
933 case 16:put_pixels16_c(dst, src, stride, height); break; | |
934 } | |
935 } | |
936 | |
937 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
938 int i,j; | |
939 for (i=0; i < height; i++) { | |
940 for (j=0; j < width; j++) { | |
941 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; | |
942 } | |
943 src += stride; | |
944 dst += stride; | |
945 } | |
946 } | |
947 | |
948 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
949 int i,j; | |
950 for (i=0; i < height; i++) { | |
951 for (j=0; j < width; j++) { | |
952 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; | |
953 } | |
954 src += stride; | |
955 dst += stride; | |
956 } | |
957 } | |
958 | |
959 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
960 int i,j; | |
961 for (i=0; i < height; i++) { | |
962 for (j=0; j < width; j++) { | |
963 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; | |
964 } | |
965 src += stride; | |
966 dst += stride; | |
967 } | |
968 } | |
969 | |
970 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
971 int i,j; | |
972 for (i=0; i < height; i++) { | |
973 for (j=0; j < width; j++) { | |
974 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; | |
975 } | |
976 src += stride; | |
977 dst += stride; | |
978 } | |
979 } | |
980 | |
981 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
982 int i,j; | |
983 for (i=0; i < height; i++) { | |
984 for (j=0; j < width; j++) { | |
985 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |
986 } | |
987 src += stride; | |
988 dst += stride; | |
989 } | |
990 } | |
991 | |
992 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
993 int i,j; | |
994 for (i=0; i < height; i++) { | |
995 for (j=0; j < width; j++) { | |
996 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; | |
997 } | |
998 src += stride; | |
999 dst += stride; | |
1000 } | |
1001 } | |
1002 | |
1003 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1004 int i,j; | |
1005 for (i=0; i < height; i++) { | |
1006 for (j=0; j < width; j++) { | |
1007 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; | |
1008 } | |
1009 src += stride; | |
1010 dst += stride; | |
1011 } | |
1012 } | |
1013 | |
1014 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1015 int i,j; | |
1016 for (i=0; i < height; i++) { | |
1017 for (j=0; j < width; j++) { | |
1018 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; | |
1019 } | |
1020 src += stride; | |
1021 dst += stride; | |
1022 } | |
1023 } | |
1024 #if 0 | |
1025 #define TPEL_WIDTH(width)\ | |
1026 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1027 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ | |
1028 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1029 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ | |
1030 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1031 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ | |
1032 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1033 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ | |
1034 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1035 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ | |
1036 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1037 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ | |
1038 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1039 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ | |
1040 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1041 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ | |
1042 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ | |
1043 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} | |
1044 #endif | |
1045 | |
822 #define H264_CHROMA_MC(OPNAME, OP)\ | 1046 #define H264_CHROMA_MC(OPNAME, OP)\ |
823 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | 1047 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
824 const int A=(8-x)*(8-y);\ | 1048 const int A=(8-x)*(8-y);\ |
825 const int B=( x)*(8-y);\ | 1049 const int B=( x)*(8-y);\ |
826 const int C=(8-x)*( y);\ | 1050 const int C=(8-x)*( y);\ |
2559 | 2783 |
2560 dspfunc(put, 0, 16); | 2784 dspfunc(put, 0, 16); |
2561 dspfunc(put_no_rnd, 0, 16); | 2785 dspfunc(put_no_rnd, 0, 16); |
2562 dspfunc(put, 1, 8); | 2786 dspfunc(put, 1, 8); |
2563 dspfunc(put_no_rnd, 1, 8); | 2787 dspfunc(put_no_rnd, 1, 8); |
2788 dspfunc(put, 2, 4); | |
2789 dspfunc(put, 3, 2); | |
2564 | 2790 |
2565 dspfunc(avg, 0, 16); | 2791 dspfunc(avg, 0, 16); |
2566 dspfunc(avg_no_rnd, 0, 16); | 2792 dspfunc(avg_no_rnd, 0, 16); |
2567 dspfunc(avg, 1, 8); | 2793 dspfunc(avg, 1, 8); |
2568 dspfunc(avg_no_rnd, 1, 8); | 2794 dspfunc(avg_no_rnd, 1, 8); |
2569 #undef dspfunc | 2795 #undef dspfunc |
2796 | |
2797 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; | |
2798 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; | |
2799 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; | |
2800 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; | |
2801 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; | |
2802 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; | |
2803 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; | |
2804 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; | |
2805 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; | |
2570 | 2806 |
2571 #define dspfunc(PFX, IDX, NUM) \ | 2807 #define dspfunc(PFX, IDX, NUM) \ |
2572 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | 2808 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ |
2573 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | 2809 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ |
2574 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | 2810 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ |
2619 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | 2855 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; |
2620 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | 2856 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; |
2621 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | 2857 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; |
2622 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | 2858 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; |
2623 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | 2859 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; |
2624 | 2860 |
2625 c->hadamard8_diff[0]= hadamard8_diff16_c; | 2861 c->hadamard8_diff[0]= hadamard8_diff16_c; |
2626 c->hadamard8_diff[1]= hadamard8_diff_c; | 2862 c->hadamard8_diff[1]= hadamard8_diff_c; |
2627 c->hadamard8_abs = hadamard8_abs_c; | 2863 c->hadamard8_abs = hadamard8_abs_c; |
2628 | 2864 |
2629 c->dct_sad[0]= dct_sad16x16_c; | 2865 c->dct_sad[0]= dct_sad16x16_c; |