Mercurial > libavcodec.hg
comparison dsputil.c @ 5520:c16a59ef6a86 libavcodec
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
author | romansh |
---|---|
date | Thu, 09 Aug 2007 17:01:15 +0000 |
parents | 362aec4ef932 |
children | b0a566346fb1 |
comparison
equal
deleted
inserted
replaced
5519:b790f8c0ee24 | 5520:c16a59ef6a86 |
---|---|
606 #define PIXOP2(OPNAME, OP) \ | 606 #define PIXOP2(OPNAME, OP) \ |
607 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 607 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
608 {\ | 608 {\ |
609 int i;\ | 609 int i;\ |
610 for(i=0; i<h; i++){\ | 610 for(i=0; i<h; i++){\ |
611 OP(*((uint64_t*)block), LD64(pixels));\ | 611 OP(*((uint64_t*)block), AV_RN64(pixels));\ |
612 pixels+=line_size;\ | 612 pixels+=line_size;\ |
613 block +=line_size;\ | 613 block +=line_size;\ |
614 }\ | 614 }\ |
615 }\ | 615 }\ |
616 \ | 616 \ |
617 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 617 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
618 {\ | 618 {\ |
619 int i;\ | 619 int i;\ |
620 for(i=0; i<h; i++){\ | 620 for(i=0; i<h; i++){\ |
621 const uint64_t a= LD64(pixels );\ | 621 const uint64_t a= AV_RN64(pixels );\ |
622 const uint64_t b= LD64(pixels+1);\ | 622 const uint64_t b= AV_RN64(pixels+1);\ |
623 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | 623 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
624 pixels+=line_size;\ | 624 pixels+=line_size;\ |
625 block +=line_size;\ | 625 block +=line_size;\ |
626 }\ | 626 }\ |
627 }\ | 627 }\ |
628 \ | 628 \ |
629 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 629 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
630 {\ | 630 {\ |
631 int i;\ | 631 int i;\ |
632 for(i=0; i<h; i++){\ | 632 for(i=0; i<h; i++){\ |
633 const uint64_t a= LD64(pixels );\ | 633 const uint64_t a= AV_RN64(pixels );\ |
634 const uint64_t b= LD64(pixels+1);\ | 634 const uint64_t b= AV_RN64(pixels+1);\ |
635 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | 635 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
636 pixels+=line_size;\ | 636 pixels+=line_size;\ |
637 block +=line_size;\ | 637 block +=line_size;\ |
638 }\ | 638 }\ |
639 }\ | 639 }\ |
640 \ | 640 \ |
641 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 641 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
642 {\ | 642 {\ |
643 int i;\ | 643 int i;\ |
644 for(i=0; i<h; i++){\ | 644 for(i=0; i<h; i++){\ |
645 const uint64_t a= LD64(pixels );\ | 645 const uint64_t a= AV_RN64(pixels );\ |
646 const uint64_t b= LD64(pixels+line_size);\ | 646 const uint64_t b= AV_RN64(pixels+line_size);\ |
647 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | 647 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
648 pixels+=line_size;\ | 648 pixels+=line_size;\ |
649 block +=line_size;\ | 649 block +=line_size;\ |
650 }\ | 650 }\ |
651 }\ | 651 }\ |
652 \ | 652 \ |
653 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 653 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
654 {\ | 654 {\ |
655 int i;\ | 655 int i;\ |
656 for(i=0; i<h; i++){\ | 656 for(i=0; i<h; i++){\ |
657 const uint64_t a= LD64(pixels );\ | 657 const uint64_t a= AV_RN64(pixels );\ |
658 const uint64_t b= LD64(pixels+line_size);\ | 658 const uint64_t b= AV_RN64(pixels+line_size);\ |
659 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | 659 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
660 pixels+=line_size;\ | 660 pixels+=line_size;\ |
661 block +=line_size;\ | 661 block +=line_size;\ |
662 }\ | 662 }\ |
663 }\ | 663 }\ |
664 \ | 664 \ |
665 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 665 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
666 {\ | 666 {\ |
667 int i;\ | 667 int i;\ |
668 const uint64_t a= LD64(pixels );\ | 668 const uint64_t a= AV_RN64(pixels );\ |
669 const uint64_t b= LD64(pixels+1);\ | 669 const uint64_t b= AV_RN64(pixels+1);\ |
670 uint64_t l0= (a&0x0303030303030303ULL)\ | 670 uint64_t l0= (a&0x0303030303030303ULL)\ |
671 + (b&0x0303030303030303ULL)\ | 671 + (b&0x0303030303030303ULL)\ |
672 + 0x0202020202020202ULL;\ | 672 + 0x0202020202020202ULL;\ |
673 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | 673 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ |
674 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | 674 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ |
675 uint64_t l1,h1;\ | 675 uint64_t l1,h1;\ |
676 \ | 676 \ |
677 pixels+=line_size;\ | 677 pixels+=line_size;\ |
678 for(i=0; i<h; i+=2){\ | 678 for(i=0; i<h; i+=2){\ |
679 uint64_t a= LD64(pixels );\ | 679 uint64_t a= AV_RN64(pixels );\ |
680 uint64_t b= LD64(pixels+1);\ | 680 uint64_t b= AV_RN64(pixels+1);\ |
681 l1= (a&0x0303030303030303ULL)\ | 681 l1= (a&0x0303030303030303ULL)\ |
682 + (b&0x0303030303030303ULL);\ | 682 + (b&0x0303030303030303ULL);\ |
683 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | 683 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ |
684 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | 684 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ |
685 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | 685 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ |
686 pixels+=line_size;\ | 686 pixels+=line_size;\ |
687 block +=line_size;\ | 687 block +=line_size;\ |
688 a= LD64(pixels );\ | 688 a= AV_RN64(pixels );\ |
689 b= LD64(pixels+1);\ | 689 b= AV_RN64(pixels+1);\ |
690 l0= (a&0x0303030303030303ULL)\ | 690 l0= (a&0x0303030303030303ULL)\ |
691 + (b&0x0303030303030303ULL)\ | 691 + (b&0x0303030303030303ULL)\ |
692 + 0x0202020202020202ULL;\ | 692 + 0x0202020202020202ULL;\ |
693 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | 693 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ |
694 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | 694 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ |
699 }\ | 699 }\ |
700 \ | 700 \ |
701 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 701 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
702 {\ | 702 {\ |
703 int i;\ | 703 int i;\ |
704 const uint64_t a= LD64(pixels );\ | 704 const uint64_t a= AV_RN64(pixels );\ |
705 const uint64_t b= LD64(pixels+1);\ | 705 const uint64_t b= AV_RN64(pixels+1);\ |
706 uint64_t l0= (a&0x0303030303030303ULL)\ | 706 uint64_t l0= (a&0x0303030303030303ULL)\ |
707 + (b&0x0303030303030303ULL)\ | 707 + (b&0x0303030303030303ULL)\ |
708 + 0x0101010101010101ULL;\ | 708 + 0x0101010101010101ULL;\ |
709 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | 709 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ |
710 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | 710 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ |
711 uint64_t l1,h1;\ | 711 uint64_t l1,h1;\ |
712 \ | 712 \ |
713 pixels+=line_size;\ | 713 pixels+=line_size;\ |
714 for(i=0; i<h; i+=2){\ | 714 for(i=0; i<h; i+=2){\ |
715 uint64_t a= LD64(pixels );\ | 715 uint64_t a= AV_RN64(pixels );\ |
716 uint64_t b= LD64(pixels+1);\ | 716 uint64_t b= AV_RN64(pixels+1);\ |
717 l1= (a&0x0303030303030303ULL)\ | 717 l1= (a&0x0303030303030303ULL)\ |
718 + (b&0x0303030303030303ULL);\ | 718 + (b&0x0303030303030303ULL);\ |
719 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | 719 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ |
720 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | 720 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ |
721 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | 721 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ |
722 pixels+=line_size;\ | 722 pixels+=line_size;\ |
723 block +=line_size;\ | 723 block +=line_size;\ |
724 a= LD64(pixels );\ | 724 a= AV_RN64(pixels );\ |
725 b= LD64(pixels+1);\ | 725 b= AV_RN64(pixels+1);\ |
726 l0= (a&0x0303030303030303ULL)\ | 726 l0= (a&0x0303030303030303ULL)\ |
727 + (b&0x0303030303030303ULL)\ | 727 + (b&0x0303030303030303ULL)\ |
728 + 0x0101010101010101ULL;\ | 728 + 0x0101010101010101ULL;\ |
729 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | 729 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ |
730 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | 730 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ |
747 | 747 |
748 #define PIXOP2(OPNAME, OP) \ | 748 #define PIXOP2(OPNAME, OP) \ |
749 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | 749 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
750 int i;\ | 750 int i;\ |
751 for(i=0; i<h; i++){\ | 751 for(i=0; i<h; i++){\ |
752 OP(*((uint16_t*)(block )), LD16(pixels ));\ | 752 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ |
753 pixels+=line_size;\ | 753 pixels+=line_size;\ |
754 block +=line_size;\ | 754 block +=line_size;\ |
755 }\ | 755 }\ |
756 }\ | 756 }\ |
757 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | 757 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
758 int i;\ | 758 int i;\ |
759 for(i=0; i<h; i++){\ | 759 for(i=0; i<h; i++){\ |
760 OP(*((uint32_t*)(block )), LD32(pixels ));\ | 760 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
761 pixels+=line_size;\ | 761 pixels+=line_size;\ |
762 block +=line_size;\ | 762 block +=line_size;\ |
763 }\ | 763 }\ |
764 }\ | 764 }\ |
765 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | 765 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
766 int i;\ | 766 int i;\ |
767 for(i=0; i<h; i++){\ | 767 for(i=0; i<h; i++){\ |
768 OP(*((uint32_t*)(block )), LD32(pixels ));\ | 768 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
769 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ | 769 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ |
770 pixels+=line_size;\ | 770 pixels+=line_size;\ |
771 block +=line_size;\ | 771 block +=line_size;\ |
772 }\ | 772 }\ |
773 }\ | 773 }\ |
774 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | 774 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
778 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | 778 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
779 int src_stride1, int src_stride2, int h){\ | 779 int src_stride1, int src_stride2, int h){\ |
780 int i;\ | 780 int i;\ |
781 for(i=0; i<h; i++){\ | 781 for(i=0; i<h; i++){\ |
782 uint32_t a,b;\ | 782 uint32_t a,b;\ |
783 a= LD32(&src1[i*src_stride1 ]);\ | 783 a= AV_RN32(&src1[i*src_stride1 ]);\ |
784 b= LD32(&src2[i*src_stride2 ]);\ | 784 b= AV_RN32(&src2[i*src_stride2 ]);\ |
785 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ | 785 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
786 a= LD32(&src1[i*src_stride1+4]);\ | 786 a= AV_RN32(&src1[i*src_stride1+4]);\ |
787 b= LD32(&src2[i*src_stride2+4]);\ | 787 b= AV_RN32(&src2[i*src_stride2+4]);\ |
788 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ | 788 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
789 }\ | 789 }\ |
790 }\ | 790 }\ |
791 \ | 791 \ |
792 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | 792 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
793 int src_stride1, int src_stride2, int h){\ | 793 int src_stride1, int src_stride2, int h){\ |
794 int i;\ | 794 int i;\ |
795 for(i=0; i<h; i++){\ | 795 for(i=0; i<h; i++){\ |
796 uint32_t a,b;\ | 796 uint32_t a,b;\ |
797 a= LD32(&src1[i*src_stride1 ]);\ | 797 a= AV_RN32(&src1[i*src_stride1 ]);\ |
798 b= LD32(&src2[i*src_stride2 ]);\ | 798 b= AV_RN32(&src2[i*src_stride2 ]);\ |
799 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | 799 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
800 a= LD32(&src1[i*src_stride1+4]);\ | 800 a= AV_RN32(&src1[i*src_stride1+4]);\ |
801 b= LD32(&src2[i*src_stride2+4]);\ | 801 b= AV_RN32(&src2[i*src_stride2+4]);\ |
802 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ | 802 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
803 }\ | 803 }\ |
804 }\ | 804 }\ |
805 \ | 805 \ |
806 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | 806 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
807 int src_stride1, int src_stride2, int h){\ | 807 int src_stride1, int src_stride2, int h){\ |
808 int i;\ | 808 int i;\ |
809 for(i=0; i<h; i++){\ | 809 for(i=0; i<h; i++){\ |
810 uint32_t a,b;\ | 810 uint32_t a,b;\ |
811 a= LD32(&src1[i*src_stride1 ]);\ | 811 a= AV_RN32(&src1[i*src_stride1 ]);\ |
812 b= LD32(&src2[i*src_stride2 ]);\ | 812 b= AV_RN32(&src2[i*src_stride2 ]);\ |
813 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | 813 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
814 }\ | 814 }\ |
815 }\ | 815 }\ |
816 \ | 816 \ |
817 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | 817 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
818 int src_stride1, int src_stride2, int h){\ | 818 int src_stride1, int src_stride2, int h){\ |
819 int i;\ | 819 int i;\ |
820 for(i=0; i<h; i++){\ | 820 for(i=0; i<h; i++){\ |
821 uint32_t a,b;\ | 821 uint32_t a,b;\ |
822 a= LD16(&src1[i*src_stride1 ]);\ | 822 a= AV_RN16(&src1[i*src_stride1 ]);\ |
823 b= LD16(&src2[i*src_stride2 ]);\ | 823 b= AV_RN16(&src2[i*src_stride2 ]);\ |
824 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | 824 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
825 }\ | 825 }\ |
826 }\ | 826 }\ |
827 \ | 827 \ |
828 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | 828 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
856 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | 856 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
857 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 857 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
858 int i;\ | 858 int i;\ |
859 for(i=0; i<h; i++){\ | 859 for(i=0; i<h; i++){\ |
860 uint32_t a, b, c, d, l0, l1, h0, h1;\ | 860 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
861 a= LD32(&src1[i*src_stride1]);\ | 861 a= AV_RN32(&src1[i*src_stride1]);\ |
862 b= LD32(&src2[i*src_stride2]);\ | 862 b= AV_RN32(&src2[i*src_stride2]);\ |
863 c= LD32(&src3[i*src_stride3]);\ | 863 c= AV_RN32(&src3[i*src_stride3]);\ |
864 d= LD32(&src4[i*src_stride4]);\ | 864 d= AV_RN32(&src4[i*src_stride4]);\ |
865 l0= (a&0x03030303UL)\ | 865 l0= (a&0x03030303UL)\ |
866 + (b&0x03030303UL)\ | 866 + (b&0x03030303UL)\ |
867 + 0x02020202UL;\ | 867 + 0x02020202UL;\ |
868 h0= ((a&0xFCFCFCFCUL)>>2)\ | 868 h0= ((a&0xFCFCFCFCUL)>>2)\ |
869 + ((b&0xFCFCFCFCUL)>>2);\ | 869 + ((b&0xFCFCFCFCUL)>>2);\ |
870 l1= (c&0x03030303UL)\ | 870 l1= (c&0x03030303UL)\ |
871 + (d&0x03030303UL);\ | 871 + (d&0x03030303UL);\ |
872 h1= ((c&0xFCFCFCFCUL)>>2)\ | 872 h1= ((c&0xFCFCFCFCUL)>>2)\ |
873 + ((d&0xFCFCFCFCUL)>>2);\ | 873 + ((d&0xFCFCFCFCUL)>>2);\ |
874 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | 874 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
875 a= LD32(&src1[i*src_stride1+4]);\ | 875 a= AV_RN32(&src1[i*src_stride1+4]);\ |
876 b= LD32(&src2[i*src_stride2+4]);\ | 876 b= AV_RN32(&src2[i*src_stride2+4]);\ |
877 c= LD32(&src3[i*src_stride3+4]);\ | 877 c= AV_RN32(&src3[i*src_stride3+4]);\ |
878 d= LD32(&src4[i*src_stride4+4]);\ | 878 d= AV_RN32(&src4[i*src_stride4+4]);\ |
879 l0= (a&0x03030303UL)\ | 879 l0= (a&0x03030303UL)\ |
880 + (b&0x03030303UL)\ | 880 + (b&0x03030303UL)\ |
881 + 0x02020202UL;\ | 881 + 0x02020202UL;\ |
882 h0= ((a&0xFCFCFCFCUL)>>2)\ | 882 h0= ((a&0xFCFCFCFCUL)>>2)\ |
883 + ((b&0xFCFCFCFCUL)>>2);\ | 883 + ((b&0xFCFCFCFCUL)>>2);\ |
908 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | 908 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
909 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | 909 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
910 int i;\ | 910 int i;\ |
911 for(i=0; i<h; i++){\ | 911 for(i=0; i<h; i++){\ |
912 uint32_t a, b, c, d, l0, l1, h0, h1;\ | 912 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
913 a= LD32(&src1[i*src_stride1]);\ | 913 a= AV_RN32(&src1[i*src_stride1]);\ |
914 b= LD32(&src2[i*src_stride2]);\ | 914 b= AV_RN32(&src2[i*src_stride2]);\ |
915 c= LD32(&src3[i*src_stride3]);\ | 915 c= AV_RN32(&src3[i*src_stride3]);\ |
916 d= LD32(&src4[i*src_stride4]);\ | 916 d= AV_RN32(&src4[i*src_stride4]);\ |
917 l0= (a&0x03030303UL)\ | 917 l0= (a&0x03030303UL)\ |
918 + (b&0x03030303UL)\ | 918 + (b&0x03030303UL)\ |
919 + 0x01010101UL;\ | 919 + 0x01010101UL;\ |
920 h0= ((a&0xFCFCFCFCUL)>>2)\ | 920 h0= ((a&0xFCFCFCFCUL)>>2)\ |
921 + ((b&0xFCFCFCFCUL)>>2);\ | 921 + ((b&0xFCFCFCFCUL)>>2);\ |
922 l1= (c&0x03030303UL)\ | 922 l1= (c&0x03030303UL)\ |
923 + (d&0x03030303UL);\ | 923 + (d&0x03030303UL);\ |
924 h1= ((c&0xFCFCFCFCUL)>>2)\ | 924 h1= ((c&0xFCFCFCFCUL)>>2)\ |
925 + ((d&0xFCFCFCFCUL)>>2);\ | 925 + ((d&0xFCFCFCFCUL)>>2);\ |
926 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | 926 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
927 a= LD32(&src1[i*src_stride1+4]);\ | 927 a= AV_RN32(&src1[i*src_stride1+4]);\ |
928 b= LD32(&src2[i*src_stride2+4]);\ | 928 b= AV_RN32(&src2[i*src_stride2+4]);\ |
929 c= LD32(&src3[i*src_stride3+4]);\ | 929 c= AV_RN32(&src3[i*src_stride3+4]);\ |
930 d= LD32(&src4[i*src_stride4+4]);\ | 930 d= AV_RN32(&src4[i*src_stride4+4]);\ |
931 l0= (a&0x03030303UL)\ | 931 l0= (a&0x03030303UL)\ |
932 + (b&0x03030303UL)\ | 932 + (b&0x03030303UL)\ |
933 + 0x01010101UL;\ | 933 + 0x01010101UL;\ |
934 h0= ((a&0xFCFCFCFCUL)>>2)\ | 934 h0= ((a&0xFCFCFCFCUL)>>2)\ |
935 + ((b&0xFCFCFCFCUL)>>2);\ | 935 + ((b&0xFCFCFCFCUL)>>2);\ |
985 }\ | 985 }\ |
986 \ | 986 \ |
987 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 987 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
988 {\ | 988 {\ |
989 int i;\ | 989 int i;\ |
990 const uint32_t a= LD32(pixels );\ | 990 const uint32_t a= AV_RN32(pixels );\ |
991 const uint32_t b= LD32(pixels+1);\ | 991 const uint32_t b= AV_RN32(pixels+1);\ |
992 uint32_t l0= (a&0x03030303UL)\ | 992 uint32_t l0= (a&0x03030303UL)\ |
993 + (b&0x03030303UL)\ | 993 + (b&0x03030303UL)\ |
994 + 0x02020202UL;\ | 994 + 0x02020202UL;\ |
995 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | 995 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
996 + ((b&0xFCFCFCFCUL)>>2);\ | 996 + ((b&0xFCFCFCFCUL)>>2);\ |
997 uint32_t l1,h1;\ | 997 uint32_t l1,h1;\ |
998 \ | 998 \ |
999 pixels+=line_size;\ | 999 pixels+=line_size;\ |
1000 for(i=0; i<h; i+=2){\ | 1000 for(i=0; i<h; i+=2){\ |
1001 uint32_t a= LD32(pixels );\ | 1001 uint32_t a= AV_RN32(pixels );\ |
1002 uint32_t b= LD32(pixels+1);\ | 1002 uint32_t b= AV_RN32(pixels+1);\ |
1003 l1= (a&0x03030303UL)\ | 1003 l1= (a&0x03030303UL)\ |
1004 + (b&0x03030303UL);\ | 1004 + (b&0x03030303UL);\ |
1005 h1= ((a&0xFCFCFCFCUL)>>2)\ | 1005 h1= ((a&0xFCFCFCFCUL)>>2)\ |
1006 + ((b&0xFCFCFCFCUL)>>2);\ | 1006 + ((b&0xFCFCFCFCUL)>>2);\ |
1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | 1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
1008 pixels+=line_size;\ | 1008 pixels+=line_size;\ |
1009 block +=line_size;\ | 1009 block +=line_size;\ |
1010 a= LD32(pixels );\ | 1010 a= AV_RN32(pixels );\ |
1011 b= LD32(pixels+1);\ | 1011 b= AV_RN32(pixels+1);\ |
1012 l0= (a&0x03030303UL)\ | 1012 l0= (a&0x03030303UL)\ |
1013 + (b&0x03030303UL)\ | 1013 + (b&0x03030303UL)\ |
1014 + 0x02020202UL;\ | 1014 + 0x02020202UL;\ |
1015 h0= ((a&0xFCFCFCFCUL)>>2)\ | 1015 h0= ((a&0xFCFCFCFCUL)>>2)\ |
1016 + ((b&0xFCFCFCFCUL)>>2);\ | 1016 + ((b&0xFCFCFCFCUL)>>2);\ |
1023 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 1023 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
1024 {\ | 1024 {\ |
1025 int j;\ | 1025 int j;\ |
1026 for(j=0; j<2; j++){\ | 1026 for(j=0; j<2; j++){\ |
1027 int i;\ | 1027 int i;\ |
1028 const uint32_t a= LD32(pixels );\ | 1028 const uint32_t a= AV_RN32(pixels );\ |
1029 const uint32_t b= LD32(pixels+1);\ | 1029 const uint32_t b= AV_RN32(pixels+1);\ |
1030 uint32_t l0= (a&0x03030303UL)\ | 1030 uint32_t l0= (a&0x03030303UL)\ |
1031 + (b&0x03030303UL)\ | 1031 + (b&0x03030303UL)\ |
1032 + 0x02020202UL;\ | 1032 + 0x02020202UL;\ |
1033 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | 1033 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
1034 + ((b&0xFCFCFCFCUL)>>2);\ | 1034 + ((b&0xFCFCFCFCUL)>>2);\ |
1035 uint32_t l1,h1;\ | 1035 uint32_t l1,h1;\ |
1036 \ | 1036 \ |
1037 pixels+=line_size;\ | 1037 pixels+=line_size;\ |
1038 for(i=0; i<h; i+=2){\ | 1038 for(i=0; i<h; i+=2){\ |
1039 uint32_t a= LD32(pixels );\ | 1039 uint32_t a= AV_RN32(pixels );\ |
1040 uint32_t b= LD32(pixels+1);\ | 1040 uint32_t b= AV_RN32(pixels+1);\ |
1041 l1= (a&0x03030303UL)\ | 1041 l1= (a&0x03030303UL)\ |
1042 + (b&0x03030303UL);\ | 1042 + (b&0x03030303UL);\ |
1043 h1= ((a&0xFCFCFCFCUL)>>2)\ | 1043 h1= ((a&0xFCFCFCFCUL)>>2)\ |
1044 + ((b&0xFCFCFCFCUL)>>2);\ | 1044 + ((b&0xFCFCFCFCUL)>>2);\ |
1045 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | 1045 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
1046 pixels+=line_size;\ | 1046 pixels+=line_size;\ |
1047 block +=line_size;\ | 1047 block +=line_size;\ |
1048 a= LD32(pixels );\ | 1048 a= AV_RN32(pixels );\ |
1049 b= LD32(pixels+1);\ | 1049 b= AV_RN32(pixels+1);\ |
1050 l0= (a&0x03030303UL)\ | 1050 l0= (a&0x03030303UL)\ |
1051 + (b&0x03030303UL)\ | 1051 + (b&0x03030303UL)\ |
1052 + 0x02020202UL;\ | 1052 + 0x02020202UL;\ |
1053 h0= ((a&0xFCFCFCFCUL)>>2)\ | 1053 h0= ((a&0xFCFCFCFCUL)>>2)\ |
1054 + ((b&0xFCFCFCFCUL)>>2);\ | 1054 + ((b&0xFCFCFCFCUL)>>2);\ |
1064 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | 1064 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
1065 {\ | 1065 {\ |
1066 int j;\ | 1066 int j;\ |
1067 for(j=0; j<2; j++){\ | 1067 for(j=0; j<2; j++){\ |
1068 int i;\ | 1068 int i;\ |
1069 const uint32_t a= LD32(pixels );\ | 1069 const uint32_t a= AV_RN32(pixels );\ |
1070 const uint32_t b= LD32(pixels+1);\ | 1070 const uint32_t b= AV_RN32(pixels+1);\ |
1071 uint32_t l0= (a&0x03030303UL)\ | 1071 uint32_t l0= (a&0x03030303UL)\ |
1072 + (b&0x03030303UL)\ | 1072 + (b&0x03030303UL)\ |
1073 + 0x01010101UL;\ | 1073 + 0x01010101UL;\ |
1074 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | 1074 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
1075 + ((b&0xFCFCFCFCUL)>>2);\ | 1075 + ((b&0xFCFCFCFCUL)>>2);\ |
1076 uint32_t l1,h1;\ | 1076 uint32_t l1,h1;\ |
1077 \ | 1077 \ |
1078 pixels+=line_size;\ | 1078 pixels+=line_size;\ |
1079 for(i=0; i<h; i+=2){\ | 1079 for(i=0; i<h; i+=2){\ |
1080 uint32_t a= LD32(pixels );\ | 1080 uint32_t a= AV_RN32(pixels );\ |
1081 uint32_t b= LD32(pixels+1);\ | 1081 uint32_t b= AV_RN32(pixels+1);\ |
1082 l1= (a&0x03030303UL)\ | 1082 l1= (a&0x03030303UL)\ |
1083 + (b&0x03030303UL);\ | 1083 + (b&0x03030303UL);\ |
1084 h1= ((a&0xFCFCFCFCUL)>>2)\ | 1084 h1= ((a&0xFCFCFCFCUL)>>2)\ |
1085 + ((b&0xFCFCFCFCUL)>>2);\ | 1085 + ((b&0xFCFCFCFCUL)>>2);\ |
1086 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | 1086 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
1087 pixels+=line_size;\ | 1087 pixels+=line_size;\ |
1088 block +=line_size;\ | 1088 block +=line_size;\ |
1089 a= LD32(pixels );\ | 1089 a= AV_RN32(pixels );\ |
1090 b= LD32(pixels+1);\ | 1090 b= AV_RN32(pixels+1);\ |
1091 l0= (a&0x03030303UL)\ | 1091 l0= (a&0x03030303UL)\ |
1092 + (b&0x03030303UL)\ | 1092 + (b&0x03030303UL)\ |
1093 + 0x01010101UL;\ | 1093 + 0x01010101UL;\ |
1094 h0= ((a&0xFCFCFCFCUL)>>2)\ | 1094 h0= ((a&0xFCFCFCFCUL)>>2)\ |
1095 + ((b&0xFCFCFCFCUL)>>2);\ | 1095 + ((b&0xFCFCFCFCUL)>>2);\ |