Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 3973:b28edd190fc0 libavcodec
removing ALTIVEC_USE_REFERENCE_C_CODE, since has no use anymore
author | lu_zero |
---|---|
date | Mon, 09 Oct 2006 18:29:46 +0000 |
parents | c86c7a54ba92 |
children | c867ae28d4de |
comparison
equal
deleted
inserted
replaced
3972:f5f1c9af095d | 3973:b28edd190fc0 |
---|---|
616 block += 8; | 616 block += 8; |
617 } | 617 } |
618 } | 618 } |
619 | 619 |
620 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { | 620 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { |
621 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
622 int i; | |
623 for(i=0; i+7<w; i++){ | |
624 dst[i+0] += src[i+0]; | |
625 dst[i+1] += src[i+1]; | |
626 dst[i+2] += src[i+2]; | |
627 dst[i+3] += src[i+3]; | |
628 dst[i+4] += src[i+4]; | |
629 dst[i+5] += src[i+5]; | |
630 dst[i+6] += src[i+6]; | |
631 dst[i+7] += src[i+7]; | |
632 } | |
633 for(; i<w; i++) | |
634 dst[i+0] += src[i+0]; | |
635 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
636 register int i; | 621 register int i; |
637 register vector unsigned char vdst, vsrc; | 622 register vector unsigned char vdst, vsrc; |
638 | 623 |
639 /* dst and src are 16 bytes-aligned (guaranteed) */ | 624 /* dst and src are 16 bytes-aligned (guaranteed) */ |
640 for(i = 0 ; (i + 15) < w ; i+=16) | 625 for(i = 0 ; (i + 15) < w ; i+=16) |
647 /* if w is not a multiple of 16 */ | 632 /* if w is not a multiple of 16 */ |
648 for (; (i < w) ; i++) | 633 for (; (i < w) ; i++) |
649 { | 634 { |
650 dst[i] = src[i]; | 635 dst[i] = src[i]; |
651 } | 636 } |
652 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
653 } | 637 } |
654 | 638 |
655 /* next one assumes that ((line_size % 16) == 0) */ | 639 /* next one assumes that ((line_size % 16) == 0) */ |
656 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 640 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
657 { | 641 { |
658 POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1); | 642 POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1); |
659 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
660 int i; | |
661 | |
662 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); | |
663 | |
664 for(i=0; i<h; i++) { | |
665 *((uint32_t*)(block)) = LD32(pixels); | |
666 *((uint32_t*)(block+4)) = LD32(pixels+4); | |
667 *((uint32_t*)(block+8)) = LD32(pixels+8); | |
668 *((uint32_t*)(block+12)) = LD32(pixels+12); | |
669 pixels+=line_size; | |
670 block +=line_size; | |
671 } | |
672 | |
673 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); | |
674 | |
675 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
676 register vector unsigned char pixelsv1, pixelsv2; | 643 register vector unsigned char pixelsv1, pixelsv2; |
677 register vector unsigned char pixelsv1B, pixelsv2B; | 644 register vector unsigned char pixelsv1B, pixelsv2B; |
678 register vector unsigned char pixelsv1C, pixelsv2C; | 645 register vector unsigned char pixelsv1C, pixelsv2C; |
679 register vector unsigned char pixelsv1D, pixelsv2D; | 646 register vector unsigned char pixelsv1D, pixelsv2D; |
680 | 647 |
720 pixels+=line_size_4; | 687 pixels+=line_size_4; |
721 block +=line_size_4; | 688 block +=line_size_4; |
722 } | 689 } |
723 #endif | 690 #endif |
724 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); | 691 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); |
725 | |
726 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
727 } | 692 } |
728 | 693 |
729 /* next one assumes that ((line_size % 16) == 0) */ | 694 /* next one assumes that ((line_size % 16) == 0) */ |
730 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) | 695 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) |
731 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 696 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
732 { | 697 { |
733 POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1); | 698 POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1); |
734 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
735 int i; | |
736 | |
737 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); | |
738 | |
739 for(i=0; i<h; i++) { | |
740 op_avg(*((uint32_t*)(block)),LD32(pixels)); | |
741 op_avg(*((uint32_t*)(block+4)),LD32(pixels+4)); | |
742 op_avg(*((uint32_t*)(block+8)),LD32(pixels+8)); | |
743 op_avg(*((uint32_t*)(block+12)),LD32(pixels+12)); | |
744 pixels+=line_size; | |
745 block +=line_size; | |
746 } | |
747 | |
748 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); | |
749 | |
750 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
751 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | 699 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; |
752 register vector unsigned char perm = vec_lvsl(0, pixels); | 700 register vector unsigned char perm = vec_lvsl(0, pixels); |
753 int i; | 701 int i; |
754 | 702 |
755 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); | 703 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); |
764 pixels+=line_size; | 712 pixels+=line_size; |
765 block +=line_size; | 713 block +=line_size; |
766 } | 714 } |
767 | 715 |
768 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); | 716 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); |
769 | |
770 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
771 } | 717 } |
772 | 718 |
773 /* next one assumes that ((line_size % 8) == 0) */ | 719 /* next one assumes that ((line_size % 8) == 0) */ |
774 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | 720 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) |
775 { | 721 { |
776 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); | 722 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); |
777 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
778 int i; | |
779 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); | |
780 for (i = 0; i < h; i++) { | |
781 *((uint32_t *) (block)) = | |
782 (((*((uint32_t *) (block))) | | |
783 ((((const struct unaligned_32 *) (pixels))->l))) - | |
784 ((((*((uint32_t *) (block))) ^ | |
785 ((((const struct unaligned_32 *) (pixels))-> | |
786 l))) & 0xFEFEFEFEUL) >> 1)); | |
787 *((uint32_t *) (block + 4)) = | |
788 (((*((uint32_t *) (block + 4))) | | |
789 ((((const struct unaligned_32 *) (pixels + 4))->l))) - | |
790 ((((*((uint32_t *) (block + 4))) ^ | |
791 ((((const struct unaligned_32 *) (pixels + | |
792 4))-> | |
793 l))) & 0xFEFEFEFEUL) >> 1)); | |
794 pixels += line_size; | |
795 block += line_size; | |
796 } | |
797 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); | |
798 | |
799 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
800 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | 723 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; |
801 int i; | 724 int i; |
802 | 725 |
803 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); | 726 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); |
804 | 727 |
830 pixels += line_size; | 753 pixels += line_size; |
831 block += line_size; | 754 block += line_size; |
832 } | 755 } |
833 | 756 |
834 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); | 757 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); |
835 | |
836 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
837 } | 758 } |
838 | 759 |
839 /* next one assumes that ((line_size % 8) == 0) */ | 760 /* next one assumes that ((line_size % 8) == 0) */ |
840 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 761 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
841 { | 762 { |
842 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); | 763 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); |
843 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
844 int j; | |
845 POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); | |
846 for (j = 0; j < 2; j++) { | |
847 int i; | |
848 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
849 const uint32_t b = | |
850 (((const struct unaligned_32 *) (pixels + 1))->l); | |
851 uint32_t l0 = | |
852 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; | |
853 uint32_t h0 = | |
854 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
855 uint32_t l1, h1; | |
856 pixels += line_size; | |
857 for (i = 0; i < h; i += 2) { | |
858 uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
859 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
860 l1 = (a & 0x03030303UL) + (b & 0x03030303UL); | |
861 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
862 *((uint32_t *) block) = | |
863 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); | |
864 pixels += line_size; | |
865 block += line_size; | |
866 a = (((const struct unaligned_32 *) (pixels))->l); | |
867 b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
868 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; | |
869 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
870 *((uint32_t *) block) = | |
871 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); | |
872 pixels += line_size; | |
873 block += line_size; | |
874 } pixels += 4 - line_size * (h + 1); | |
875 block += 4 - line_size * h; | |
876 } | |
877 | |
878 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | |
879 | |
880 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
881 register int i; | 764 register int i; |
882 register vector unsigned char | 765 register vector unsigned char |
883 pixelsv1, pixelsv2, | 766 pixelsv1, pixelsv2, |
884 pixelsavg; | 767 pixelsavg; |
885 register vector unsigned char | 768 register vector unsigned char |
946 block += line_size; | 829 block += line_size; |
947 pixels += line_size; | 830 pixels += line_size; |
948 } | 831 } |
949 | 832 |
950 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | 833 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); |
951 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
952 } | 834 } |
953 | 835 |
954 /* next one assumes that ((line_size % 8) == 0) */ | 836 /* next one assumes that ((line_size % 8) == 0) */ |
955 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 837 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
956 { | 838 { |
957 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); | 839 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); |
958 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
959 int j; | |
960 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |
961 for (j = 0; j < 2; j++) { | |
962 int i; | |
963 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
964 const uint32_t b = | |
965 (((const struct unaligned_32 *) (pixels + 1))->l); | |
966 uint32_t l0 = | |
967 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; | |
968 uint32_t h0 = | |
969 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
970 uint32_t l1, h1; | |
971 pixels += line_size; | |
972 for (i = 0; i < h; i += 2) { | |
973 uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
974 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
975 l1 = (a & 0x03030303UL) + (b & 0x03030303UL); | |
976 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
977 *((uint32_t *) block) = | |
978 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); | |
979 pixels += line_size; | |
980 block += line_size; | |
981 a = (((const struct unaligned_32 *) (pixels))->l); | |
982 b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
983 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; | |
984 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
985 *((uint32_t *) block) = | |
986 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); | |
987 pixels += line_size; | |
988 block += line_size; | |
989 } pixels += 4 - line_size * (h + 1); | |
990 block += 4 - line_size * h; | |
991 } | |
992 | |
993 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | |
994 | |
995 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
996 register int i; | 840 register int i; |
997 register vector unsigned char | 841 register vector unsigned char |
998 pixelsv1, pixelsv2, | 842 pixelsv1, pixelsv2, |
999 pixelsavg; | 843 pixelsavg; |
1000 register vector unsigned char | 844 register vector unsigned char |
1062 block += line_size; | 906 block += line_size; |
1063 pixels += line_size; | 907 pixels += line_size; |
1064 } | 908 } |
1065 | 909 |
1066 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | 910 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); |
1067 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1068 } | 911 } |
1069 | 912 |
1070 /* next one assumes that ((line_size % 16) == 0) */ | 913 /* next one assumes that ((line_size % 16) == 0) */ |
1071 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | 914 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) |
1072 { | 915 { |
1073 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); | 916 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); |
1074 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
1075 int j; | |
1076 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); | |
1077 for (j = 0; j < 4; j++) { | |
1078 int i; | |
1079 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
1080 const uint32_t b = | |
1081 (((const struct unaligned_32 *) (pixels + 1))->l); | |
1082 uint32_t l0 = | |
1083 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; | |
1084 uint32_t h0 = | |
1085 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1086 uint32_t l1, h1; | |
1087 pixels += line_size; | |
1088 for (i = 0; i < h; i += 2) { | |
1089 uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
1090 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1091 l1 = (a & 0x03030303UL) + (b & 0x03030303UL); | |
1092 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1093 *((uint32_t *) block) = | |
1094 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); | |
1095 pixels += line_size; | |
1096 block += line_size; | |
1097 a = (((const struct unaligned_32 *) (pixels))->l); | |
1098 b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1099 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; | |
1100 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1101 *((uint32_t *) block) = | |
1102 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); | |
1103 pixels += line_size; | |
1104 block += line_size; | |
1105 } pixels += 4 - line_size * (h + 1); | |
1106 block += 4 - line_size * h; | |
1107 } | |
1108 | |
1109 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | |
1110 | |
1111 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1112 register int i; | 917 register int i; |
1113 register vector unsigned char | 918 register vector unsigned char |
1114 pixelsv1, pixelsv2, pixelsv3, pixelsv4; | 919 pixelsv1, pixelsv2, pixelsv3, pixelsv4; |
1115 register vector unsigned char | 920 register vector unsigned char |
1116 blockv, temp1, temp2; | 921 blockv, temp1, temp2; |
1183 block += line_size; | 988 block += line_size; |
1184 pixels += line_size; | 989 pixels += line_size; |
1185 } | 990 } |
1186 | 991 |
1187 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | 992 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); |
1188 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1189 } | 993 } |
1190 | 994 |
1191 /* next one assumes that ((line_size % 16) == 0) */ | 995 /* next one assumes that ((line_size % 16) == 0) */ |
1192 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | 996 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) |
1193 { | 997 { |
1194 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); | 998 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1195 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
1196 int j; | |
1197 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |
1198 for (j = 0; j < 4; j++) { | |
1199 int i; | |
1200 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
1201 const uint32_t b = | |
1202 (((const struct unaligned_32 *) (pixels + 1))->l); | |
1203 uint32_t l0 = | |
1204 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; | |
1205 uint32_t h0 = | |
1206 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1207 uint32_t l1, h1; | |
1208 pixels += line_size; | |
1209 for (i = 0; i < h; i += 2) { | |
1210 uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
1211 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1212 l1 = (a & 0x03030303UL) + (b & 0x03030303UL); | |
1213 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1214 *((uint32_t *) block) = | |
1215 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); | |
1216 pixels += line_size; | |
1217 block += line_size; | |
1218 a = (((const struct unaligned_32 *) (pixels))->l); | |
1219 b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1220 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; | |
1221 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1222 *((uint32_t *) block) = | |
1223 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); | |
1224 pixels += line_size; | |
1225 block += line_size; | |
1226 } pixels += 4 - line_size * (h + 1); | |
1227 block += 4 - line_size * h; | |
1228 } | |
1229 | |
1230 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | |
1231 | |
1232 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1233 register int i; | 999 register int i; |
1234 register vector unsigned char | 1000 register vector unsigned char |
1235 pixelsv1, pixelsv2, pixelsv3, pixelsv4; | 1001 pixelsv1, pixelsv2, pixelsv3, pixelsv4; |
1236 register vector unsigned char | 1002 register vector unsigned char |
1237 blockv, temp1, temp2; | 1003 blockv, temp1, temp2; |
1305 block += line_size; | 1071 block += line_size; |
1306 pixels += line_size; | 1072 pixels += line_size; |
1307 } | 1073 } |
1308 | 1074 |
1309 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1075 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1310 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1311 } | 1076 } |
1312 | 1077 |
1313 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ | 1078 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
1314 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); | 1079 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); |
1315 int sum; | 1080 int sum; |
1725 | 1490 |
1726 /* next one assumes that ((line_size % 8) == 0) */ | 1491 /* next one assumes that ((line_size % 8) == 0) */ |
1727 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 1492 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
1728 { | 1493 { |
1729 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); | 1494 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); |
1730 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
1731 | |
1732 int j; | |
1733 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); | |
1734 for (j = 0; j < 2; j++) { | |
1735 int i; | |
1736 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
1737 const uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1738 uint32_t l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; | |
1739 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1740 uint32_t l1, h1; | |
1741 pixels += line_size; | |
1742 for (i = 0; i < h; i += 2) { | |
1743 uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | |
1744 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1745 l1 = (a & 0x03030303UL) + (b & 0x03030303UL); | |
1746 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1747 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); | |
1748 pixels += line_size; | |
1749 block += line_size; | |
1750 a = (((const struct unaligned_32 *) (pixels))->l); | |
1751 b = (((const struct unaligned_32 *) (pixels + 1))->l); | |
1752 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; | |
1753 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); | |
1754 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); | |
1755 pixels += line_size; | |
1756 block += line_size; | |
1757 } pixels += 4 - line_size * (h + 1); | |
1758 block += 4 - line_size * h; | |
1759 } | |
1760 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); | |
1761 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1762 register int i; | 1495 register int i; |
1763 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; | 1496 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
1764 register vector unsigned char blockv, temp1, temp2, blocktemp; | 1497 register vector unsigned char blockv, temp1, temp2, blocktemp; |
1765 register vector unsigned short pixelssum1, pixelssum2, temp3; | 1498 register vector unsigned short pixelssum1, pixelssum2, temp3; |
1766 | 1499 |
1819 block += line_size; | 1552 block += line_size; |
1820 pixels += line_size; | 1553 pixels += line_size; |
1821 } | 1554 } |
1822 | 1555 |
1823 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); | 1556 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); |
1824 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
1825 } | 1557 } |
1826 | 1558 |
1827 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) | 1559 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) |
1828 { | 1560 { |
1829 c->pix_abs[0][1] = sad16_x2_altivec; | 1561 c->pix_abs[0][1] = sad16_x2_altivec; |