comparison ppc/dsputil_altivec.c @ 3973:b28edd190fc0 libavcodec

removing ALTIVEC_USE_REFERENCE_C_CODE, since has no use anymore
author lu_zero
date Mon, 09 Oct 2006 18:29:46 +0000
parents c86c7a54ba92
children c867ae28d4de
comparison
equal deleted inserted replaced
3972:f5f1c9af095d 3973:b28edd190fc0
616 block += 8; 616 block += 8;
617 } 617 }
618 } 618 }
619 619
620 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { 620 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
621 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
622 int i;
623 for(i=0; i+7<w; i++){
624 dst[i+0] += src[i+0];
625 dst[i+1] += src[i+1];
626 dst[i+2] += src[i+2];
627 dst[i+3] += src[i+3];
628 dst[i+4] += src[i+4];
629 dst[i+5] += src[i+5];
630 dst[i+6] += src[i+6];
631 dst[i+7] += src[i+7];
632 }
633 for(; i<w; i++)
634 dst[i+0] += src[i+0];
635 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
636 register int i; 621 register int i;
637 register vector unsigned char vdst, vsrc; 622 register vector unsigned char vdst, vsrc;
638 623
639 /* dst and src are 16 bytes-aligned (guaranteed) */ 624 /* dst and src are 16 bytes-aligned (guaranteed) */
640 for(i = 0 ; (i + 15) < w ; i+=16) 625 for(i = 0 ; (i + 15) < w ; i+=16)
647 /* if w is not a multiple of 16 */ 632 /* if w is not a multiple of 16 */
648 for (; (i < w) ; i++) 633 for (; (i < w) ; i++)
649 { 634 {
650 dst[i] = src[i]; 635 dst[i] = src[i];
651 } 636 }
652 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
653 } 637 }
654 638
655 /* next one assumes that ((line_size % 16) == 0) */ 639 /* next one assumes that ((line_size % 16) == 0) */
656 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 640 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
657 { 641 {
658 POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1); 642 POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
659 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
660 int i;
661
662 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
663
664 for(i=0; i<h; i++) {
665 *((uint32_t*)(block)) = LD32(pixels);
666 *((uint32_t*)(block+4)) = LD32(pixels+4);
667 *((uint32_t*)(block+8)) = LD32(pixels+8);
668 *((uint32_t*)(block+12)) = LD32(pixels+12);
669 pixels+=line_size;
670 block +=line_size;
671 }
672
673 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
674
675 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
676 register vector unsigned char pixelsv1, pixelsv2; 643 register vector unsigned char pixelsv1, pixelsv2;
677 register vector unsigned char pixelsv1B, pixelsv2B; 644 register vector unsigned char pixelsv1B, pixelsv2B;
678 register vector unsigned char pixelsv1C, pixelsv2C; 645 register vector unsigned char pixelsv1C, pixelsv2C;
679 register vector unsigned char pixelsv1D, pixelsv2D; 646 register vector unsigned char pixelsv1D, pixelsv2D;
680 647
720 pixels+=line_size_4; 687 pixels+=line_size_4;
721 block +=line_size_4; 688 block +=line_size_4;
722 } 689 }
723 #endif 690 #endif
724 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); 691 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
725
726 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
727 } 692 }
728 693
729 /* next one assumes that ((line_size % 16) == 0) */ 694 /* next one assumes that ((line_size % 16) == 0) */
730 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) 695 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
731 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 696 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
732 { 697 {
733 POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1); 698 POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
734 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
735 int i;
736
737 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
738
739 for(i=0; i<h; i++) {
740 op_avg(*((uint32_t*)(block)),LD32(pixels));
741 op_avg(*((uint32_t*)(block+4)),LD32(pixels+4));
742 op_avg(*((uint32_t*)(block+8)),LD32(pixels+8));
743 op_avg(*((uint32_t*)(block+12)),LD32(pixels+12));
744 pixels+=line_size;
745 block +=line_size;
746 }
747
748 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
749
750 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
751 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; 699 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
752 register vector unsigned char perm = vec_lvsl(0, pixels); 700 register vector unsigned char perm = vec_lvsl(0, pixels);
753 int i; 701 int i;
754 702
755 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); 703 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
764 pixels+=line_size; 712 pixels+=line_size;
765 block +=line_size; 713 block +=line_size;
766 } 714 }
767 715
768 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); 716 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
769
770 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
771 } 717 }
772 718
773 /* next one assumes that ((line_size % 8) == 0) */ 719 /* next one assumes that ((line_size % 8) == 0) */
774 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) 720 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
775 { 721 {
776 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); 722 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
777 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
778 int i;
779 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
780 for (i = 0; i < h; i++) {
781 *((uint32_t *) (block)) =
782 (((*((uint32_t *) (block))) |
783 ((((const struct unaligned_32 *) (pixels))->l))) -
784 ((((*((uint32_t *) (block))) ^
785 ((((const struct unaligned_32 *) (pixels))->
786 l))) & 0xFEFEFEFEUL) >> 1));
787 *((uint32_t *) (block + 4)) =
788 (((*((uint32_t *) (block + 4))) |
789 ((((const struct unaligned_32 *) (pixels + 4))->l))) -
790 ((((*((uint32_t *) (block + 4))) ^
791 ((((const struct unaligned_32 *) (pixels +
792 4))->
793 l))) & 0xFEFEFEFEUL) >> 1));
794 pixels += line_size;
795 block += line_size;
796 }
797 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
798
799 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
800 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; 723 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
801 int i; 724 int i;
802 725
803 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); 726 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
804 727
830 pixels += line_size; 753 pixels += line_size;
831 block += line_size; 754 block += line_size;
832 } 755 }
833 756
834 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); 757 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
835
836 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
837 } 758 }
838 759
839 /* next one assumes that ((line_size % 8) == 0) */ 760 /* next one assumes that ((line_size % 8) == 0) */
840 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 761 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
841 { 762 {
842 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); 763 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
843 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
844 int j;
845 POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
846 for (j = 0; j < 2; j++) {
847 int i;
848 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
849 const uint32_t b =
850 (((const struct unaligned_32 *) (pixels + 1))->l);
851 uint32_t l0 =
852 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
853 uint32_t h0 =
854 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
855 uint32_t l1, h1;
856 pixels += line_size;
857 for (i = 0; i < h; i += 2) {
858 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
859 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
860 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
861 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
862 *((uint32_t *) block) =
863 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
864 pixels += line_size;
865 block += line_size;
866 a = (((const struct unaligned_32 *) (pixels))->l);
867 b = (((const struct unaligned_32 *) (pixels + 1))->l);
868 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
869 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
870 *((uint32_t *) block) =
871 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
872 pixels += line_size;
873 block += line_size;
874 } pixels += 4 - line_size * (h + 1);
875 block += 4 - line_size * h;
876 }
877
878 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
879
880 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
881 register int i; 764 register int i;
882 register vector unsigned char 765 register vector unsigned char
883 pixelsv1, pixelsv2, 766 pixelsv1, pixelsv2,
884 pixelsavg; 767 pixelsavg;
885 register vector unsigned char 768 register vector unsigned char
946 block += line_size; 829 block += line_size;
947 pixels += line_size; 830 pixels += line_size;
948 } 831 }
949 832
950 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); 833 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
951 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
952 } 834 }
953 835
954 /* next one assumes that ((line_size % 8) == 0) */ 836 /* next one assumes that ((line_size % 8) == 0) */
955 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 837 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
956 { 838 {
957 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); 839 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
958 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
959 int j;
960 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
961 for (j = 0; j < 2; j++) {
962 int i;
963 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
964 const uint32_t b =
965 (((const struct unaligned_32 *) (pixels + 1))->l);
966 uint32_t l0 =
967 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
968 uint32_t h0 =
969 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
970 uint32_t l1, h1;
971 pixels += line_size;
972 for (i = 0; i < h; i += 2) {
973 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
974 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
975 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
976 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
977 *((uint32_t *) block) =
978 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
979 pixels += line_size;
980 block += line_size;
981 a = (((const struct unaligned_32 *) (pixels))->l);
982 b = (((const struct unaligned_32 *) (pixels + 1))->l);
983 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
984 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
985 *((uint32_t *) block) =
986 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
987 pixels += line_size;
988 block += line_size;
989 } pixels += 4 - line_size * (h + 1);
990 block += 4 - line_size * h;
991 }
992
993 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
994
995 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
996 register int i; 840 register int i;
997 register vector unsigned char 841 register vector unsigned char
998 pixelsv1, pixelsv2, 842 pixelsv1, pixelsv2,
999 pixelsavg; 843 pixelsavg;
1000 register vector unsigned char 844 register vector unsigned char
1062 block += line_size; 906 block += line_size;
1063 pixels += line_size; 907 pixels += line_size;
1064 } 908 }
1065 909
1066 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 910 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
1067 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1068 } 911 }
1069 912
1070 /* next one assumes that ((line_size % 16) == 0) */ 913 /* next one assumes that ((line_size % 16) == 0) */
1071 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) 914 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
1072 { 915 {
1073 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); 916 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
1074 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
1075 int j;
1076 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
1077 for (j = 0; j < 4; j++) {
1078 int i;
1079 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1080 const uint32_t b =
1081 (((const struct unaligned_32 *) (pixels + 1))->l);
1082 uint32_t l0 =
1083 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1084 uint32_t h0 =
1085 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1086 uint32_t l1, h1;
1087 pixels += line_size;
1088 for (i = 0; i < h; i += 2) {
1089 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1090 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
1091 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
1092 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1093 *((uint32_t *) block) =
1094 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1095 pixels += line_size;
1096 block += line_size;
1097 a = (((const struct unaligned_32 *) (pixels))->l);
1098 b = (((const struct unaligned_32 *) (pixels + 1))->l);
1099 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1100 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1101 *((uint32_t *) block) =
1102 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1103 pixels += line_size;
1104 block += line_size;
1105 } pixels += 4 - line_size * (h + 1);
1106 block += 4 - line_size * h;
1107 }
1108
1109 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
1110
1111 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1112 register int i; 917 register int i;
1113 register vector unsigned char 918 register vector unsigned char
1114 pixelsv1, pixelsv2, pixelsv3, pixelsv4; 919 pixelsv1, pixelsv2, pixelsv3, pixelsv4;
1115 register vector unsigned char 920 register vector unsigned char
1116 blockv, temp1, temp2; 921 blockv, temp1, temp2;
1183 block += line_size; 988 block += line_size;
1184 pixels += line_size; 989 pixels += line_size;
1185 } 990 }
1186 991
1187 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); 992 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
1188 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1189 } 993 }
1190 994
1191 /* next one assumes that ((line_size % 16) == 0) */ 995 /* next one assumes that ((line_size % 16) == 0) */
1192 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) 996 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
1193 { 997 {
1194 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); 998 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
1195 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
1196 int j;
1197 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1198 for (j = 0; j < 4; j++) {
1199 int i;
1200 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1201 const uint32_t b =
1202 (((const struct unaligned_32 *) (pixels + 1))->l);
1203 uint32_t l0 =
1204 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
1205 uint32_t h0 =
1206 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1207 uint32_t l1, h1;
1208 pixels += line_size;
1209 for (i = 0; i < h; i += 2) {
1210 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1211 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
1212 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
1213 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1214 *((uint32_t *) block) =
1215 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1216 pixels += line_size;
1217 block += line_size;
1218 a = (((const struct unaligned_32 *) (pixels))->l);
1219 b = (((const struct unaligned_32 *) (pixels + 1))->l);
1220 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
1221 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1222 *((uint32_t *) block) =
1223 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1224 pixels += line_size;
1225 block += line_size;
1226 } pixels += 4 - line_size * (h + 1);
1227 block += 4 - line_size * h;
1228 }
1229
1230 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1231
1232 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1233 register int i; 999 register int i;
1234 register vector unsigned char 1000 register vector unsigned char
1235 pixelsv1, pixelsv2, pixelsv3, pixelsv4; 1001 pixelsv1, pixelsv2, pixelsv3, pixelsv4;
1236 register vector unsigned char 1002 register vector unsigned char
1237 blockv, temp1, temp2; 1003 blockv, temp1, temp2;
1305 block += line_size; 1071 block += line_size;
1306 pixels += line_size; 1072 pixels += line_size;
1307 } 1073 }
1308 1074
1309 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 1075 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1310 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1311 } 1076 }
1312 1077
1313 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 1078 int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1314 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); 1079 POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
1315 int sum; 1080 int sum;
1725 1490
1726 /* next one assumes that ((line_size % 8) == 0) */ 1491 /* next one assumes that ((line_size % 8) == 0) */
1727 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 1492 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
1728 { 1493 {
1729 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1); 1494 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
1730 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
1731
1732 int j;
1733 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
1734 for (j = 0; j < 2; j++) {
1735 int i;
1736 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1737 const uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
1738 uint32_t l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1739 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1740 uint32_t l1, h1;
1741 pixels += line_size;
1742 for (i = 0; i < h; i += 2) {
1743 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1744 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
1745 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
1746 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1747 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1748 pixels += line_size;
1749 block += line_size;
1750 a = (((const struct unaligned_32 *) (pixels))->l);
1751 b = (((const struct unaligned_32 *) (pixels + 1))->l);
1752 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1753 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1754 *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1755 pixels += line_size;
1756 block += line_size;
1757 } pixels += 4 - line_size * (h + 1);
1758 block += 4 - line_size * h;
1759 }
1760 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
1761 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1762 register int i; 1495 register int i;
1763 register vector unsigned char pixelsv1, pixelsv2, pixelsavg; 1496 register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
1764 register vector unsigned char blockv, temp1, temp2, blocktemp; 1497 register vector unsigned char blockv, temp1, temp2, blocktemp;
1765 register vector unsigned short pixelssum1, pixelssum2, temp3; 1498 register vector unsigned short pixelssum1, pixelssum2, temp3;
1766 1499
1819 block += line_size; 1552 block += line_size;
1820 pixels += line_size; 1553 pixels += line_size;
1821 } 1554 }
1822 1555
1823 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1); 1556 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
1824 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1825 } 1557 }
1826 1558
1827 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) 1559 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
1828 { 1560 {
1829 c->pix_abs[0][1] = sad16_x2_altivec; 1561 c->pix_abs[0][1] = sad16_x2_altivec;