comparison dsputil.c @ 1267:85b71f9f7450 libavcodec

moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
author michaelni
date Thu, 15 May 2003 23:30:03 +0000
parents 2fa34e615c76
children a979fab41ed8
comparison
equal deleted inserted replaced
1266:ec946cb74397 1267:85b71f9f7450
464 464
465 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) 465 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
466 #else // 64 bit variant 466 #else // 64 bit variant
467 467
468 #define PIXOP2(OPNAME, OP) \ 468 #define PIXOP2(OPNAME, OP) \
469 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
470 int i;\
471 for(i=0; i<h; i++){\
472 OP(*((uint16_t*)(block )), LD16(pixels ));\
473 pixels+=line_size;\
474 block +=line_size;\
475 }\
476 }\
469 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 477 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
470 int i;\ 478 int i;\
471 for(i=0; i<h; i++){\ 479 for(i=0; i<h; i++){\
472 OP(*((uint32_t*)(block )), LD32(pixels ));\ 480 OP(*((uint32_t*)(block )), LD32(pixels ));\
473 pixels+=line_size;\ 481 pixels+=line_size;\
521 for(i=0; i<h; i++){\ 529 for(i=0; i<h; i++){\
522 uint32_t a,b;\ 530 uint32_t a,b;\
523 a= LD32(&src1[i*src_stride1 ]);\ 531 a= LD32(&src1[i*src_stride1 ]);\
524 b= LD32(&src2[i*src_stride2 ]);\ 532 b= LD32(&src2[i*src_stride2 ]);\
525 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ 533 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
534 }\
535 }\
536 \
537 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
538 int src_stride1, int src_stride2, int h){\
539 int i;\
540 for(i=0; i<h; i++){\
541 uint32_t a,b;\
542 a= LD16(&src1[i*src_stride1 ]);\
543 b= LD16(&src2[i*src_stride2 ]);\
544 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
526 }\ 545 }\
527 }\ 546 }\
528 \ 547 \
529 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 548 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
530 int src_stride1, int src_stride2, int h){\ 549 int src_stride1, int src_stride2, int h){\
587 h1= ((c&0xFCFCFCFCUL)>>2)\ 606 h1= ((c&0xFCFCFCFCUL)>>2)\
588 + ((d&0xFCFCFCFCUL)>>2);\ 607 + ((d&0xFCFCFCFCUL)>>2);\
589 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 608 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
590 }\ 609 }\
591 }\ 610 }\
611 \
612 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
613 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
614 }\
615 \
616 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
617 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
618 }\
619 \
620 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
621 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
622 }\
623 \
624 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
625 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
626 }\
627 \
592 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ 628 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
593 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 629 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
594 int i;\ 630 int i;\
595 for(i=0; i<h; i++){\ 631 for(i=0; i<h; i++){\
596 uint32_t a, b, c, d, l0, l1, h0, h1;\ 632 uint32_t a, b, c, d, l0, l1, h0, h1;\
631 }\ 667 }\
632 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ 668 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
633 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 669 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
634 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 670 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
635 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 671 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
672 }\
673 \
674 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
675 {\
676 int i, a0, b0, a1, b1;\
677 a0= pixels[0];\
678 b0= pixels[1] + 2;\
679 a0 += b0;\
680 b0 += pixels[2];\
681 \
682 pixels+=line_size;\
683 for(i=0; i<h; i+=2){\
684 a1= pixels[0];\
685 b1= pixels[1];\
686 a1 += b1;\
687 b1 += pixels[2];\
688 \
689 block[0]= (a1+a0)>>2; /* FIXME non put */\
690 block[1]= (b1+b0)>>2;\
691 \
692 pixels+=line_size;\
693 block +=line_size;\
694 \
695 a0= pixels[0];\
696 b0= pixels[1] + 2;\
697 a0 += b0;\
698 b0 += pixels[2];\
699 \
700 block[0]= (a1+a0)>>2;\
701 block[1]= (b1+b0)>>2;\
702 pixels+=line_size;\
703 block +=line_size;\
704 }\
705 }\
706 \
707 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
708 {\
709 int i;\
710 const uint32_t a= LD32(pixels );\
711 const uint32_t b= LD32(pixels+1);\
712 uint32_t l0= (a&0x03030303UL)\
713 + (b&0x03030303UL)\
714 + 0x02020202UL;\
715 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
716 + ((b&0xFCFCFCFCUL)>>2);\
717 uint32_t l1,h1;\
718 \
719 pixels+=line_size;\
720 for(i=0; i<h; i+=2){\
721 uint32_t a= LD32(pixels );\
722 uint32_t b= LD32(pixels+1);\
723 l1= (a&0x03030303UL)\
724 + (b&0x03030303UL);\
725 h1= ((a&0xFCFCFCFCUL)>>2)\
726 + ((b&0xFCFCFCFCUL)>>2);\
727 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
728 pixels+=line_size;\
729 block +=line_size;\
730 a= LD32(pixels );\
731 b= LD32(pixels+1);\
732 l0= (a&0x03030303UL)\
733 + (b&0x03030303UL)\
734 + 0x02020202UL;\
735 h0= ((a&0xFCFCFCFCUL)>>2)\
736 + ((b&0xFCFCFCFCUL)>>2);\
737 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
738 pixels+=line_size;\
739 block +=line_size;\
740 }\
636 }\ 741 }\
637 \ 742 \
638 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 743 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
639 {\ 744 {\
640 int j;\ 745 int j;\
817 } 922 }
818 ox += dxy; 923 ox += dxy;
819 oy += dyy; 924 oy += dyy;
820 } 925 }
821 } 926 }
927
928 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
929 switch(width){
930 case 2: put_pixels2_c (dst, src, stride, height); break;
931 case 4: put_pixels4_c (dst, src, stride, height); break;
932 case 8: put_pixels8_c (dst, src, stride, height); break;
933 case 16:put_pixels16_c(dst, src, stride, height); break;
934 }
935 }
936
937 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
938 int i,j;
939 for (i=0; i < height; i++) {
940 for (j=0; j < width; j++) {
941 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
942 }
943 src += stride;
944 dst += stride;
945 }
946 }
947
948 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
949 int i,j;
950 for (i=0; i < height; i++) {
951 for (j=0; j < width; j++) {
952 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
953 }
954 src += stride;
955 dst += stride;
956 }
957 }
958
959 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
960 int i,j;
961 for (i=0; i < height; i++) {
962 for (j=0; j < width; j++) {
963 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
964 }
965 src += stride;
966 dst += stride;
967 }
968 }
969
970 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
971 int i,j;
972 for (i=0; i < height; i++) {
973 for (j=0; j < width; j++) {
974 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
975 }
976 src += stride;
977 dst += stride;
978 }
979 }
980
981 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
982 int i,j;
983 for (i=0; i < height; i++) {
984 for (j=0; j < width; j++) {
985 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
986 }
987 src += stride;
988 dst += stride;
989 }
990 }
991
992 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
993 int i,j;
994 for (i=0; i < height; i++) {
995 for (j=0; j < width; j++) {
996 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
997 }
998 src += stride;
999 dst += stride;
1000 }
1001 }
1002
1003 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1004 int i,j;
1005 for (i=0; i < height; i++) {
1006 for (j=0; j < width; j++) {
1007 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1008 }
1009 src += stride;
1010 dst += stride;
1011 }
1012 }
1013
1014 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1015 int i,j;
1016 for (i=0; i < height; i++) {
1017 for (j=0; j < width; j++) {
1018 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1019 }
1020 src += stride;
1021 dst += stride;
1022 }
1023 }
1024 #if 0
1025 #define TPEL_WIDTH(width)\
1026 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1027 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1028 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1029 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1030 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1031 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1032 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1033 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1034 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1035 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1036 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1037 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1038 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1039 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1040 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1041 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1042 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1043 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1044 #endif
1045
822 #define H264_CHROMA_MC(OPNAME, OP)\ 1046 #define H264_CHROMA_MC(OPNAME, OP)\
823 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 1047 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
824 const int A=(8-x)*(8-y);\ 1048 const int A=(8-x)*(8-y);\
825 const int B=( x)*(8-y);\ 1049 const int B=( x)*(8-y);\
826 const int C=(8-x)*( y);\ 1050 const int C=(8-x)*( y);\
2559 2783
2560 dspfunc(put, 0, 16); 2784 dspfunc(put, 0, 16);
2561 dspfunc(put_no_rnd, 0, 16); 2785 dspfunc(put_no_rnd, 0, 16);
2562 dspfunc(put, 1, 8); 2786 dspfunc(put, 1, 8);
2563 dspfunc(put_no_rnd, 1, 8); 2787 dspfunc(put_no_rnd, 1, 8);
2788 dspfunc(put, 2, 4);
2789 dspfunc(put, 3, 2);
2564 2790
2565 dspfunc(avg, 0, 16); 2791 dspfunc(avg, 0, 16);
2566 dspfunc(avg_no_rnd, 0, 16); 2792 dspfunc(avg_no_rnd, 0, 16);
2567 dspfunc(avg, 1, 8); 2793 dspfunc(avg, 1, 8);
2568 dspfunc(avg_no_rnd, 1, 8); 2794 dspfunc(avg_no_rnd, 1, 8);
2569 #undef dspfunc 2795 #undef dspfunc
2796
2797 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2798 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2799 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2800 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2801 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2802 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2803 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2804 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2805 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2570 2806
2571 #define dspfunc(PFX, IDX, NUM) \ 2807 #define dspfunc(PFX, IDX, NUM) \
2572 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ 2808 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2573 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ 2809 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2574 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ 2810 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2619 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; 2855 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2620 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; 2856 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2621 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; 2857 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2622 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; 2858 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2623 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; 2859 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2624 2860
2625 c->hadamard8_diff[0]= hadamard8_diff16_c; 2861 c->hadamard8_diff[0]= hadamard8_diff16_c;
2626 c->hadamard8_diff[1]= hadamard8_diff_c; 2862 c->hadamard8_diff[1]= hadamard8_diff_c;
2627 c->hadamard8_abs = hadamard8_abs_c; 2863 c->hadamard8_abs = hadamard8_abs_c;
2628 2864
2629 c->dct_sad[0]= dct_sad16x16_c; 2865 c->dct_sad[0]= dct_sad16x16_c;