comparison dsputil.c @ 5520:c16a59ef6a86 libavcodec

* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
author romansh
date Thu, 09 Aug 2007 17:01:15 +0000
parents 362aec4ef932
children b0a566346fb1
comparison
equal deleted inserted replaced
5519:b790f8c0ee24 5520:c16a59ef6a86
606 #define PIXOP2(OPNAME, OP) \ 606 #define PIXOP2(OPNAME, OP) \
607 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 607 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
608 {\ 608 {\
609 int i;\ 609 int i;\
610 for(i=0; i<h; i++){\ 610 for(i=0; i<h; i++){\
611 OP(*((uint64_t*)block), LD64(pixels));\ 611 OP(*((uint64_t*)block), AV_RN64(pixels));\
612 pixels+=line_size;\ 612 pixels+=line_size;\
613 block +=line_size;\ 613 block +=line_size;\
614 }\ 614 }\
615 }\ 615 }\
616 \ 616 \
617 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 617 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
618 {\ 618 {\
619 int i;\ 619 int i;\
620 for(i=0; i<h; i++){\ 620 for(i=0; i<h; i++){\
621 const uint64_t a= LD64(pixels );\ 621 const uint64_t a= AV_RN64(pixels );\
622 const uint64_t b= LD64(pixels+1);\ 622 const uint64_t b= AV_RN64(pixels+1);\
623 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 623 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
624 pixels+=line_size;\ 624 pixels+=line_size;\
625 block +=line_size;\ 625 block +=line_size;\
626 }\ 626 }\
627 }\ 627 }\
628 \ 628 \
629 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 629 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
630 {\ 630 {\
631 int i;\ 631 int i;\
632 for(i=0; i<h; i++){\ 632 for(i=0; i<h; i++){\
633 const uint64_t a= LD64(pixels );\ 633 const uint64_t a= AV_RN64(pixels );\
634 const uint64_t b= LD64(pixels+1);\ 634 const uint64_t b= AV_RN64(pixels+1);\
635 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 635 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
636 pixels+=line_size;\ 636 pixels+=line_size;\
637 block +=line_size;\ 637 block +=line_size;\
638 }\ 638 }\
639 }\ 639 }\
640 \ 640 \
641 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 641 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
642 {\ 642 {\
643 int i;\ 643 int i;\
644 for(i=0; i<h; i++){\ 644 for(i=0; i<h; i++){\
645 const uint64_t a= LD64(pixels );\ 645 const uint64_t a= AV_RN64(pixels );\
646 const uint64_t b= LD64(pixels+line_size);\ 646 const uint64_t b= AV_RN64(pixels+line_size);\
647 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 647 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
648 pixels+=line_size;\ 648 pixels+=line_size;\
649 block +=line_size;\ 649 block +=line_size;\
650 }\ 650 }\
651 }\ 651 }\
652 \ 652 \
653 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 653 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
654 {\ 654 {\
655 int i;\ 655 int i;\
656 for(i=0; i<h; i++){\ 656 for(i=0; i<h; i++){\
657 const uint64_t a= LD64(pixels );\ 657 const uint64_t a= AV_RN64(pixels );\
658 const uint64_t b= LD64(pixels+line_size);\ 658 const uint64_t b= AV_RN64(pixels+line_size);\
659 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 659 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
660 pixels+=line_size;\ 660 pixels+=line_size;\
661 block +=line_size;\ 661 block +=line_size;\
662 }\ 662 }\
663 }\ 663 }\
664 \ 664 \
665 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 665 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
666 {\ 666 {\
667 int i;\ 667 int i;\
668 const uint64_t a= LD64(pixels );\ 668 const uint64_t a= AV_RN64(pixels );\
669 const uint64_t b= LD64(pixels+1);\ 669 const uint64_t b= AV_RN64(pixels+1);\
670 uint64_t l0= (a&0x0303030303030303ULL)\ 670 uint64_t l0= (a&0x0303030303030303ULL)\
671 + (b&0x0303030303030303ULL)\ 671 + (b&0x0303030303030303ULL)\
672 + 0x0202020202020202ULL;\ 672 + 0x0202020202020202ULL;\
673 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 673 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
674 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 674 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
675 uint64_t l1,h1;\ 675 uint64_t l1,h1;\
676 \ 676 \
677 pixels+=line_size;\ 677 pixels+=line_size;\
678 for(i=0; i<h; i+=2){\ 678 for(i=0; i<h; i+=2){\
679 uint64_t a= LD64(pixels );\ 679 uint64_t a= AV_RN64(pixels );\
680 uint64_t b= LD64(pixels+1);\ 680 uint64_t b= AV_RN64(pixels+1);\
681 l1= (a&0x0303030303030303ULL)\ 681 l1= (a&0x0303030303030303ULL)\
682 + (b&0x0303030303030303ULL);\ 682 + (b&0x0303030303030303ULL);\
683 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 683 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
684 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 684 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
685 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 685 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
686 pixels+=line_size;\ 686 pixels+=line_size;\
687 block +=line_size;\ 687 block +=line_size;\
688 a= LD64(pixels );\ 688 a= AV_RN64(pixels );\
689 b= LD64(pixels+1);\ 689 b= AV_RN64(pixels+1);\
690 l0= (a&0x0303030303030303ULL)\ 690 l0= (a&0x0303030303030303ULL)\
691 + (b&0x0303030303030303ULL)\ 691 + (b&0x0303030303030303ULL)\
692 + 0x0202020202020202ULL;\ 692 + 0x0202020202020202ULL;\
693 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 693 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
694 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 694 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
699 }\ 699 }\
700 \ 700 \
701 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 701 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
702 {\ 702 {\
703 int i;\ 703 int i;\
704 const uint64_t a= LD64(pixels );\ 704 const uint64_t a= AV_RN64(pixels );\
705 const uint64_t b= LD64(pixels+1);\ 705 const uint64_t b= AV_RN64(pixels+1);\
706 uint64_t l0= (a&0x0303030303030303ULL)\ 706 uint64_t l0= (a&0x0303030303030303ULL)\
707 + (b&0x0303030303030303ULL)\ 707 + (b&0x0303030303030303ULL)\
708 + 0x0101010101010101ULL;\ 708 + 0x0101010101010101ULL;\
709 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 709 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
710 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 710 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
711 uint64_t l1,h1;\ 711 uint64_t l1,h1;\
712 \ 712 \
713 pixels+=line_size;\ 713 pixels+=line_size;\
714 for(i=0; i<h; i+=2){\ 714 for(i=0; i<h; i+=2){\
715 uint64_t a= LD64(pixels );\ 715 uint64_t a= AV_RN64(pixels );\
716 uint64_t b= LD64(pixels+1);\ 716 uint64_t b= AV_RN64(pixels+1);\
717 l1= (a&0x0303030303030303ULL)\ 717 l1= (a&0x0303030303030303ULL)\
718 + (b&0x0303030303030303ULL);\ 718 + (b&0x0303030303030303ULL);\
719 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 719 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
720 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 720 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
721 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 721 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
722 pixels+=line_size;\ 722 pixels+=line_size;\
723 block +=line_size;\ 723 block +=line_size;\
724 a= LD64(pixels );\ 724 a= AV_RN64(pixels );\
725 b= LD64(pixels+1);\ 725 b= AV_RN64(pixels+1);\
726 l0= (a&0x0303030303030303ULL)\ 726 l0= (a&0x0303030303030303ULL)\
727 + (b&0x0303030303030303ULL)\ 727 + (b&0x0303030303030303ULL)\
728 + 0x0101010101010101ULL;\ 728 + 0x0101010101010101ULL;\
729 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 729 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
730 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 730 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
747 747
748 #define PIXOP2(OPNAME, OP) \ 748 #define PIXOP2(OPNAME, OP) \
749 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 749 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
750 int i;\ 750 int i;\
751 for(i=0; i<h; i++){\ 751 for(i=0; i<h; i++){\
752 OP(*((uint16_t*)(block )), LD16(pixels ));\ 752 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
753 pixels+=line_size;\ 753 pixels+=line_size;\
754 block +=line_size;\ 754 block +=line_size;\
755 }\ 755 }\
756 }\ 756 }\
757 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 757 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
758 int i;\ 758 int i;\
759 for(i=0; i<h; i++){\ 759 for(i=0; i<h; i++){\
760 OP(*((uint32_t*)(block )), LD32(pixels ));\ 760 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
761 pixels+=line_size;\ 761 pixels+=line_size;\
762 block +=line_size;\ 762 block +=line_size;\
763 }\ 763 }\
764 }\ 764 }\
765 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 765 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
766 int i;\ 766 int i;\
767 for(i=0; i<h; i++){\ 767 for(i=0; i<h; i++){\
768 OP(*((uint32_t*)(block )), LD32(pixels ));\ 768 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
769 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ 769 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
770 pixels+=line_size;\ 770 pixels+=line_size;\
771 block +=line_size;\ 771 block +=line_size;\
772 }\ 772 }\
773 }\ 773 }\
774 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 774 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
778 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 778 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
779 int src_stride1, int src_stride2, int h){\ 779 int src_stride1, int src_stride2, int h){\
780 int i;\ 780 int i;\
781 for(i=0; i<h; i++){\ 781 for(i=0; i<h; i++){\
782 uint32_t a,b;\ 782 uint32_t a,b;\
783 a= LD32(&src1[i*src_stride1 ]);\ 783 a= AV_RN32(&src1[i*src_stride1 ]);\
784 b= LD32(&src2[i*src_stride2 ]);\ 784 b= AV_RN32(&src2[i*src_stride2 ]);\
785 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ 785 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
786 a= LD32(&src1[i*src_stride1+4]);\ 786 a= AV_RN32(&src1[i*src_stride1+4]);\
787 b= LD32(&src2[i*src_stride2+4]);\ 787 b= AV_RN32(&src2[i*src_stride2+4]);\
788 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ 788 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
789 }\ 789 }\
790 }\ 790 }\
791 \ 791 \
792 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 792 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
793 int src_stride1, int src_stride2, int h){\ 793 int src_stride1, int src_stride2, int h){\
794 int i;\ 794 int i;\
795 for(i=0; i<h; i++){\ 795 for(i=0; i<h; i++){\
796 uint32_t a,b;\ 796 uint32_t a,b;\
797 a= LD32(&src1[i*src_stride1 ]);\ 797 a= AV_RN32(&src1[i*src_stride1 ]);\
798 b= LD32(&src2[i*src_stride2 ]);\ 798 b= AV_RN32(&src2[i*src_stride2 ]);\
799 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ 799 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
800 a= LD32(&src1[i*src_stride1+4]);\ 800 a= AV_RN32(&src1[i*src_stride1+4]);\
801 b= LD32(&src2[i*src_stride2+4]);\ 801 b= AV_RN32(&src2[i*src_stride2+4]);\
802 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ 802 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
803 }\ 803 }\
804 }\ 804 }\
805 \ 805 \
806 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 806 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
807 int src_stride1, int src_stride2, int h){\ 807 int src_stride1, int src_stride2, int h){\
808 int i;\ 808 int i;\
809 for(i=0; i<h; i++){\ 809 for(i=0; i<h; i++){\
810 uint32_t a,b;\ 810 uint32_t a,b;\
811 a= LD32(&src1[i*src_stride1 ]);\ 811 a= AV_RN32(&src1[i*src_stride1 ]);\
812 b= LD32(&src2[i*src_stride2 ]);\ 812 b= AV_RN32(&src2[i*src_stride2 ]);\
813 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ 813 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
814 }\ 814 }\
815 }\ 815 }\
816 \ 816 \
817 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 817 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
818 int src_stride1, int src_stride2, int h){\ 818 int src_stride1, int src_stride2, int h){\
819 int i;\ 819 int i;\
820 for(i=0; i<h; i++){\ 820 for(i=0; i<h; i++){\
821 uint32_t a,b;\ 821 uint32_t a,b;\
822 a= LD16(&src1[i*src_stride1 ]);\ 822 a= AV_RN16(&src1[i*src_stride1 ]);\
823 b= LD16(&src2[i*src_stride2 ]);\ 823 b= AV_RN16(&src2[i*src_stride2 ]);\
824 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ 824 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
825 }\ 825 }\
826 }\ 826 }\
827 \ 827 \
828 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 828 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
856 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ 856 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
857 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 857 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
858 int i;\ 858 int i;\
859 for(i=0; i<h; i++){\ 859 for(i=0; i<h; i++){\
860 uint32_t a, b, c, d, l0, l1, h0, h1;\ 860 uint32_t a, b, c, d, l0, l1, h0, h1;\
861 a= LD32(&src1[i*src_stride1]);\ 861 a= AV_RN32(&src1[i*src_stride1]);\
862 b= LD32(&src2[i*src_stride2]);\ 862 b= AV_RN32(&src2[i*src_stride2]);\
863 c= LD32(&src3[i*src_stride3]);\ 863 c= AV_RN32(&src3[i*src_stride3]);\
864 d= LD32(&src4[i*src_stride4]);\ 864 d= AV_RN32(&src4[i*src_stride4]);\
865 l0= (a&0x03030303UL)\ 865 l0= (a&0x03030303UL)\
866 + (b&0x03030303UL)\ 866 + (b&0x03030303UL)\
867 + 0x02020202UL;\ 867 + 0x02020202UL;\
868 h0= ((a&0xFCFCFCFCUL)>>2)\ 868 h0= ((a&0xFCFCFCFCUL)>>2)\
869 + ((b&0xFCFCFCFCUL)>>2);\ 869 + ((b&0xFCFCFCFCUL)>>2);\
870 l1= (c&0x03030303UL)\ 870 l1= (c&0x03030303UL)\
871 + (d&0x03030303UL);\ 871 + (d&0x03030303UL);\
872 h1= ((c&0xFCFCFCFCUL)>>2)\ 872 h1= ((c&0xFCFCFCFCUL)>>2)\
873 + ((d&0xFCFCFCFCUL)>>2);\ 873 + ((d&0xFCFCFCFCUL)>>2);\
874 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 874 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
875 a= LD32(&src1[i*src_stride1+4]);\ 875 a= AV_RN32(&src1[i*src_stride1+4]);\
876 b= LD32(&src2[i*src_stride2+4]);\ 876 b= AV_RN32(&src2[i*src_stride2+4]);\
877 c= LD32(&src3[i*src_stride3+4]);\ 877 c= AV_RN32(&src3[i*src_stride3+4]);\
878 d= LD32(&src4[i*src_stride4+4]);\ 878 d= AV_RN32(&src4[i*src_stride4+4]);\
879 l0= (a&0x03030303UL)\ 879 l0= (a&0x03030303UL)\
880 + (b&0x03030303UL)\ 880 + (b&0x03030303UL)\
881 + 0x02020202UL;\ 881 + 0x02020202UL;\
882 h0= ((a&0xFCFCFCFCUL)>>2)\ 882 h0= ((a&0xFCFCFCFCUL)>>2)\
883 + ((b&0xFCFCFCFCUL)>>2);\ 883 + ((b&0xFCFCFCFCUL)>>2);\
908 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ 908 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
909 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 909 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
910 int i;\ 910 int i;\
911 for(i=0; i<h; i++){\ 911 for(i=0; i<h; i++){\
912 uint32_t a, b, c, d, l0, l1, h0, h1;\ 912 uint32_t a, b, c, d, l0, l1, h0, h1;\
913 a= LD32(&src1[i*src_stride1]);\ 913 a= AV_RN32(&src1[i*src_stride1]);\
914 b= LD32(&src2[i*src_stride2]);\ 914 b= AV_RN32(&src2[i*src_stride2]);\
915 c= LD32(&src3[i*src_stride3]);\ 915 c= AV_RN32(&src3[i*src_stride3]);\
916 d= LD32(&src4[i*src_stride4]);\ 916 d= AV_RN32(&src4[i*src_stride4]);\
917 l0= (a&0x03030303UL)\ 917 l0= (a&0x03030303UL)\
918 + (b&0x03030303UL)\ 918 + (b&0x03030303UL)\
919 + 0x01010101UL;\ 919 + 0x01010101UL;\
920 h0= ((a&0xFCFCFCFCUL)>>2)\ 920 h0= ((a&0xFCFCFCFCUL)>>2)\
921 + ((b&0xFCFCFCFCUL)>>2);\ 921 + ((b&0xFCFCFCFCUL)>>2);\
922 l1= (c&0x03030303UL)\ 922 l1= (c&0x03030303UL)\
923 + (d&0x03030303UL);\ 923 + (d&0x03030303UL);\
924 h1= ((c&0xFCFCFCFCUL)>>2)\ 924 h1= ((c&0xFCFCFCFCUL)>>2)\
925 + ((d&0xFCFCFCFCUL)>>2);\ 925 + ((d&0xFCFCFCFCUL)>>2);\
926 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 926 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
927 a= LD32(&src1[i*src_stride1+4]);\ 927 a= AV_RN32(&src1[i*src_stride1+4]);\
928 b= LD32(&src2[i*src_stride2+4]);\ 928 b= AV_RN32(&src2[i*src_stride2+4]);\
929 c= LD32(&src3[i*src_stride3+4]);\ 929 c= AV_RN32(&src3[i*src_stride3+4]);\
930 d= LD32(&src4[i*src_stride4+4]);\ 930 d= AV_RN32(&src4[i*src_stride4+4]);\
931 l0= (a&0x03030303UL)\ 931 l0= (a&0x03030303UL)\
932 + (b&0x03030303UL)\ 932 + (b&0x03030303UL)\
933 + 0x01010101UL;\ 933 + 0x01010101UL;\
934 h0= ((a&0xFCFCFCFCUL)>>2)\ 934 h0= ((a&0xFCFCFCFCUL)>>2)\
935 + ((b&0xFCFCFCFCUL)>>2);\ 935 + ((b&0xFCFCFCFCUL)>>2);\
985 }\ 985 }\
986 \ 986 \
987 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 987 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
988 {\ 988 {\
989 int i;\ 989 int i;\
990 const uint32_t a= LD32(pixels );\ 990 const uint32_t a= AV_RN32(pixels );\
991 const uint32_t b= LD32(pixels+1);\ 991 const uint32_t b= AV_RN32(pixels+1);\
992 uint32_t l0= (a&0x03030303UL)\ 992 uint32_t l0= (a&0x03030303UL)\
993 + (b&0x03030303UL)\ 993 + (b&0x03030303UL)\
994 + 0x02020202UL;\ 994 + 0x02020202UL;\
995 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 995 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
996 + ((b&0xFCFCFCFCUL)>>2);\ 996 + ((b&0xFCFCFCFCUL)>>2);\
997 uint32_t l1,h1;\ 997 uint32_t l1,h1;\
998 \ 998 \
999 pixels+=line_size;\ 999 pixels+=line_size;\
1000 for(i=0; i<h; i+=2){\ 1000 for(i=0; i<h; i+=2){\
1001 uint32_t a= LD32(pixels );\ 1001 uint32_t a= AV_RN32(pixels );\
1002 uint32_t b= LD32(pixels+1);\ 1002 uint32_t b= AV_RN32(pixels+1);\
1003 l1= (a&0x03030303UL)\ 1003 l1= (a&0x03030303UL)\
1004 + (b&0x03030303UL);\ 1004 + (b&0x03030303UL);\
1005 h1= ((a&0xFCFCFCFCUL)>>2)\ 1005 h1= ((a&0xFCFCFCFCUL)>>2)\
1006 + ((b&0xFCFCFCFCUL)>>2);\ 1006 + ((b&0xFCFCFCFCUL)>>2);\
1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1008 pixels+=line_size;\ 1008 pixels+=line_size;\
1009 block +=line_size;\ 1009 block +=line_size;\
1010 a= LD32(pixels );\ 1010 a= AV_RN32(pixels );\
1011 b= LD32(pixels+1);\ 1011 b= AV_RN32(pixels+1);\
1012 l0= (a&0x03030303UL)\ 1012 l0= (a&0x03030303UL)\
1013 + (b&0x03030303UL)\ 1013 + (b&0x03030303UL)\
1014 + 0x02020202UL;\ 1014 + 0x02020202UL;\
1015 h0= ((a&0xFCFCFCFCUL)>>2)\ 1015 h0= ((a&0xFCFCFCFCUL)>>2)\
1016 + ((b&0xFCFCFCFCUL)>>2);\ 1016 + ((b&0xFCFCFCFCUL)>>2);\
1023 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 1023 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1024 {\ 1024 {\
1025 int j;\ 1025 int j;\
1026 for(j=0; j<2; j++){\ 1026 for(j=0; j<2; j++){\
1027 int i;\ 1027 int i;\
1028 const uint32_t a= LD32(pixels );\ 1028 const uint32_t a= AV_RN32(pixels );\
1029 const uint32_t b= LD32(pixels+1);\ 1029 const uint32_t b= AV_RN32(pixels+1);\
1030 uint32_t l0= (a&0x03030303UL)\ 1030 uint32_t l0= (a&0x03030303UL)\
1031 + (b&0x03030303UL)\ 1031 + (b&0x03030303UL)\
1032 + 0x02020202UL;\ 1032 + 0x02020202UL;\
1033 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 1033 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1034 + ((b&0xFCFCFCFCUL)>>2);\ 1034 + ((b&0xFCFCFCFCUL)>>2);\
1035 uint32_t l1,h1;\ 1035 uint32_t l1,h1;\
1036 \ 1036 \
1037 pixels+=line_size;\ 1037 pixels+=line_size;\
1038 for(i=0; i<h; i+=2){\ 1038 for(i=0; i<h; i+=2){\
1039 uint32_t a= LD32(pixels );\ 1039 uint32_t a= AV_RN32(pixels );\
1040 uint32_t b= LD32(pixels+1);\ 1040 uint32_t b= AV_RN32(pixels+1);\
1041 l1= (a&0x03030303UL)\ 1041 l1= (a&0x03030303UL)\
1042 + (b&0x03030303UL);\ 1042 + (b&0x03030303UL);\
1043 h1= ((a&0xFCFCFCFCUL)>>2)\ 1043 h1= ((a&0xFCFCFCFCUL)>>2)\
1044 + ((b&0xFCFCFCFCUL)>>2);\ 1044 + ((b&0xFCFCFCFCUL)>>2);\
1045 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1045 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1046 pixels+=line_size;\ 1046 pixels+=line_size;\
1047 block +=line_size;\ 1047 block +=line_size;\
1048 a= LD32(pixels );\ 1048 a= AV_RN32(pixels );\
1049 b= LD32(pixels+1);\ 1049 b= AV_RN32(pixels+1);\
1050 l0= (a&0x03030303UL)\ 1050 l0= (a&0x03030303UL)\
1051 + (b&0x03030303UL)\ 1051 + (b&0x03030303UL)\
1052 + 0x02020202UL;\ 1052 + 0x02020202UL;\
1053 h0= ((a&0xFCFCFCFCUL)>>2)\ 1053 h0= ((a&0xFCFCFCFCUL)>>2)\
1054 + ((b&0xFCFCFCFCUL)>>2);\ 1054 + ((b&0xFCFCFCFCUL)>>2);\
1064 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 1064 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1065 {\ 1065 {\
1066 int j;\ 1066 int j;\
1067 for(j=0; j<2; j++){\ 1067 for(j=0; j<2; j++){\
1068 int i;\ 1068 int i;\
1069 const uint32_t a= LD32(pixels );\ 1069 const uint32_t a= AV_RN32(pixels );\
1070 const uint32_t b= LD32(pixels+1);\ 1070 const uint32_t b= AV_RN32(pixels+1);\
1071 uint32_t l0= (a&0x03030303UL)\ 1071 uint32_t l0= (a&0x03030303UL)\
1072 + (b&0x03030303UL)\ 1072 + (b&0x03030303UL)\
1073 + 0x01010101UL;\ 1073 + 0x01010101UL;\
1074 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 1074 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1075 + ((b&0xFCFCFCFCUL)>>2);\ 1075 + ((b&0xFCFCFCFCUL)>>2);\
1076 uint32_t l1,h1;\ 1076 uint32_t l1,h1;\
1077 \ 1077 \
1078 pixels+=line_size;\ 1078 pixels+=line_size;\
1079 for(i=0; i<h; i+=2){\ 1079 for(i=0; i<h; i+=2){\
1080 uint32_t a= LD32(pixels );\ 1080 uint32_t a= AV_RN32(pixels );\
1081 uint32_t b= LD32(pixels+1);\ 1081 uint32_t b= AV_RN32(pixels+1);\
1082 l1= (a&0x03030303UL)\ 1082 l1= (a&0x03030303UL)\
1083 + (b&0x03030303UL);\ 1083 + (b&0x03030303UL);\
1084 h1= ((a&0xFCFCFCFCUL)>>2)\ 1084 h1= ((a&0xFCFCFCFCUL)>>2)\
1085 + ((b&0xFCFCFCFCUL)>>2);\ 1085 + ((b&0xFCFCFCFCUL)>>2);\
1086 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 1086 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1087 pixels+=line_size;\ 1087 pixels+=line_size;\
1088 block +=line_size;\ 1088 block +=line_size;\
1089 a= LD32(pixels );\ 1089 a= AV_RN32(pixels );\
1090 b= LD32(pixels+1);\ 1090 b= AV_RN32(pixels+1);\
1091 l0= (a&0x03030303UL)\ 1091 l0= (a&0x03030303UL)\
1092 + (b&0x03030303UL)\ 1092 + (b&0x03030303UL)\
1093 + 0x01010101UL;\ 1093 + 0x01010101UL;\
1094 h0= ((a&0xFCFCFCFCUL)>>2)\ 1094 h0= ((a&0xFCFCFCFCUL)>>2)\
1095 + ((b&0xFCFCFCFCUL)>>2);\ 1095 + ((b&0xFCFCFCFCUL)>>2);\