comparison ppc/dsputil_altivec.c @ 1352:e8ff4783f188 libavcodec

1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Wed, 09 Jul 2003 20:18:13 +0000
parents 09b8fe0f0139
children dea5b2946999
comparison
equal deleted inserted replaced
1351:0fc1a6f8ca94 1352:e8ff4783f188
653 } 653 }
654 654
655 /* next one assumes that ((line_size % 16) == 0) */ 655 /* next one assumes that ((line_size % 16) == 0) */
656 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 656 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
657 { 657 {
658 POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1); 658 POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1);
659 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 659 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
660 int i; 660 int i;
661 661
662 POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); 662 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
663 663
664 for(i=0; i<h; i++) { 664 for(i=0; i<h; i++) {
665 *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); 665 *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l);
666 *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l); 666 *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l);
667 *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l); 667 *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l);
668 *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l); 668 *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l);
669 pixels+=line_size; 669 pixels+=line_size;
670 block +=line_size; 670 block +=line_size;
671 } 671 }
672 672
673 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); 673 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
674 674
675 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 675 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
676 register vector unsigned char pixelsv1, pixelsv2; 676 register vector unsigned char pixelsv1, pixelsv2;
677 register vector unsigned char pixelsv1B, pixelsv2B;
678 register vector unsigned char pixelsv1C, pixelsv2C;
679 register vector unsigned char pixelsv1D, pixelsv2D;
680
677 register vector unsigned char perm = vec_lvsl(0, pixels); 681 register vector unsigned char perm = vec_lvsl(0, pixels);
678 int i; 682 int i;
679 683 register int line_size_2 = line_size << 1;
680 POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); 684 register int line_size_3 = line_size + line_size_2;
681 685 register int line_size_4 = line_size << 2;
686
687 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1);
688 // hand-unrolling the loop by 4 gains about 15%
689 // mininum execution time goes from 74 to 60 cycles
690 // it's faster than -funroll-loops, but using
691 // -funroll-loops w/ this is bad - 74 cycles again.
692 // all this is on a 7450, tuning for the 7450
693 #if 0
682 for(i=0; i<h; i++) { 694 for(i=0; i<h; i++) {
683 pixelsv1 = vec_ld(0, (unsigned char*)pixels); 695 pixelsv1 = vec_ld(0, (unsigned char*)pixels);
684 pixelsv2 = vec_ld(16, (unsigned char*)pixels); 696 pixelsv2 = vec_ld(16, (unsigned char*)pixels);
685 vec_st(vec_perm(pixelsv1, pixelsv2, perm), 697 vec_st(vec_perm(pixelsv1, pixelsv2, perm),
686 0, (unsigned char*)block); 698 0, (unsigned char*)block);
687 pixels+=line_size; 699 pixels+=line_size;
688 block +=line_size; 700 block +=line_size;
689 } 701 }
690 702 #else
691 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); 703 for(i=0; i<h; i+=4) {
704 pixelsv1 = vec_ld(0, (unsigned char*)pixels);
705 pixelsv2 = vec_ld(16, (unsigned char*)pixels);
706 pixelsv1B = vec_ld(line_size, (unsigned char*)pixels);
707 pixelsv2B = vec_ld(16 + line_size, (unsigned char*)pixels);
708 pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels);
709 pixelsv2C = vec_ld(16 + line_size_2, (unsigned char*)pixels);
710 pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels);
711 pixelsv2D = vec_ld(16 + line_size_3, (unsigned char*)pixels);
712 vec_st(vec_perm(pixelsv1, pixelsv2, perm),
713 0, (unsigned char*)block);
714 vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
715 line_size, (unsigned char*)block);
716 vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
717 line_size_2, (unsigned char*)block);
718 vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
719 line_size_3, (unsigned char*)block);
720 pixels+=line_size_4;
721 block +=line_size_4;
722 }
723 #endif
724 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1);
692 725
693 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 726 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
694 } 727 }
695 728
696 /* next one assumes that ((line_size % 16) == 0) */ 729 /* next one assumes that ((line_size % 16) == 0) */
697 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) 730 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
698 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 731 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
699 { 732 {
700 POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1); 733 POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1);
701 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 734 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
702 int i; 735 int i;
703 736
704 POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); 737 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
705 738
706 for(i=0; i<h; i++) { 739 for(i=0; i<h; i++) {
707 op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); 740 op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l));
708 op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l)); 741 op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l));
709 op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l)); 742 op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l));
710 op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l)); 743 op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l));
711 pixels+=line_size; 744 pixels+=line_size;
712 block +=line_size; 745 block +=line_size;
713 } 746 }
714 747
715 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); 748 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
716 749
717 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 750 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
718 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; 751 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
719 register vector unsigned char perm = vec_lvsl(0, pixels); 752 register vector unsigned char perm = vec_lvsl(0, pixels);
720 int i; 753 int i;
721 754
722 POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); 755 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1);
723 756
724 for(i=0; i<h; i++) { 757 for(i=0; i<h; i++) {
725 pixelsv1 = vec_ld(0, (unsigned char*)pixels); 758 pixelsv1 = vec_ld(0, (unsigned char*)pixels);
726 pixelsv2 = vec_ld(16, (unsigned char*)pixels); 759 pixelsv2 = vec_ld(16, (unsigned char*)pixels);
727 blockv = vec_ld(0, block); 760 blockv = vec_ld(0, block);
730 vec_st(blockv, 0, (unsigned char*)block); 763 vec_st(blockv, 0, (unsigned char*)block);
731 pixels+=line_size; 764 pixels+=line_size;
732 block +=line_size; 765 block +=line_size;
733 } 766 }
734 767
735 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); 768 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1);
736 769
737 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 770 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
738 } 771 }
739 772
740 /* next one assumes that ((line_size % 8) == 0) */ 773 /* next one assumes that ((line_size % 8) == 0) */
741 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) 774 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
742 { 775 {
743 POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1); 776 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1);
744 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 777 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
745 int i; 778 int i;
746 POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); 779 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
747 for (i = 0; i < h; i++) { 780 for (i = 0; i < h; i++) {
748 *((uint32_t *) (block)) = 781 *((uint32_t *) (block)) =
749 (((*((uint32_t *) (block))) | 782 (((*((uint32_t *) (block))) |
750 ((((const struct unaligned_32 *) (pixels))->l))) - 783 ((((const struct unaligned_32 *) (pixels))->l))) -
751 ((((*((uint32_t *) (block))) ^ 784 ((((*((uint32_t *) (block))) ^
759 4))-> 792 4))->
760 l))) & 0xFEFEFEFEUL) >> 1)); 793 l))) & 0xFEFEFEFEUL) >> 1));
761 pixels += line_size; 794 pixels += line_size;
762 block += line_size; 795 block += line_size;
763 } 796 }
764 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); 797 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
765 798
766 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 799 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
767 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; 800 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
768 int i; 801 int i;
769 802
770 POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); 803 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
771 804
772 for (i = 0; i < h; i++) { 805 for (i = 0; i < h; i++) {
773 /* 806 /*
774 block is 8 bytes-aligned, so we're either in the 807 block is 8 bytes-aligned, so we're either in the
775 left block (16 bytes-aligned) or in the right block (not) 808 left block (16 bytes-aligned) or in the right block (not)
796 829
797 pixels += line_size; 830 pixels += line_size;
798 block += line_size; 831 block += line_size;
799 } 832 }
800 833
801 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); 834 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
802 835
803 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 836 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
804 } 837 }
805 838
806 /* next one assumes that ((line_size % 8) == 0) */ 839 /* next one assumes that ((line_size % 8) == 0) */
807 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 840 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
808 { 841 {
809 POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1); 842 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
810 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 843 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
811 int j; 844 int j;
812 POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); 845 POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
813 for (j = 0; j < 2; j++) { 846 for (j = 0; j < 2; j++) {
814 int i; 847 int i;
815 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); 848 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
816 const uint32_t b = 849 const uint32_t b =
817 (((const struct unaligned_32 *) (pixels + 1))->l); 850 (((const struct unaligned_32 *) (pixels + 1))->l);
840 block += line_size; 873 block += line_size;
841 } pixels += 4 - line_size * (h + 1); 874 } pixels += 4 - line_size * (h + 1);
842 block += 4 - line_size * h; 875 block += 4 - line_size * h;
843 } 876 }
844 877
845 POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); 878 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
846 879
847 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 880 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
848 register int i; 881 register int i;
849 register vector unsigned char 882 register vector unsigned char
850 pixelsv1, pixelsv2, 883 pixelsv1, pixelsv2,
871 pixelsv2 = vec_mergeh(vczero, pixelsv2); 904 pixelsv2 = vec_mergeh(vczero, pixelsv2);
872 pixelssum1 = vec_add((vector unsigned short)pixelsv1, 905 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
873 (vector unsigned short)pixelsv2); 906 (vector unsigned short)pixelsv2);
874 pixelssum1 = vec_add(pixelssum1, vctwo); 907 pixelssum1 = vec_add(pixelssum1, vctwo);
875 908
876 POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); 909 POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
877 for (i = 0; i < h ; i++) { 910 for (i = 0; i < h ; i++) {
878 int rightside = ((unsigned long)block & 0x0000000F); 911 int rightside = ((unsigned long)block & 0x0000000F);
879 blockv = vec_ld(0, block); 912 blockv = vec_ld(0, block);
880 913
881 temp1 = vec_ld(line_size, pixels); 914 temp1 = vec_ld(line_size, pixels);
912 945
913 block += line_size; 946 block += line_size;
914 pixels += line_size; 947 pixels += line_size;
915 } 948 }
916 949
917 POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); 950 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
918 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 951 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
919 } 952 }
920 953
921 /* next one assumes that ((line_size % 8) == 0) */ 954 /* next one assumes that ((line_size % 8) == 0) */
922 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 955 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
923 { 956 {
924 POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); 957 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
925 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 958 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
926 int j; 959 int j;
927 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 960 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
928 for (j = 0; j < 2; j++) { 961 for (j = 0; j < 2; j++) {
929 int i; 962 int i;
930 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); 963 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
931 const uint32_t b = 964 const uint32_t b =
932 (((const struct unaligned_32 *) (pixels + 1))->l); 965 (((const struct unaligned_32 *) (pixels + 1))->l);
955 block += line_size; 988 block += line_size;
956 } pixels += 4 - line_size * (h + 1); 989 } pixels += 4 - line_size * (h + 1);
957 block += 4 - line_size * h; 990 block += 4 - line_size * h;
958 } 991 }
959 992
960 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 993 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
961 994
962 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 995 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
963 register int i; 996 register int i;
964 register vector unsigned char 997 register vector unsigned char
965 pixelsv1, pixelsv2, 998 pixelsv1, pixelsv2,
987 pixelsv2 = vec_mergeh(vczero, pixelsv2); 1020 pixelsv2 = vec_mergeh(vczero, pixelsv2);
988 pixelssum1 = vec_add((vector unsigned short)pixelsv1, 1021 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
989 (vector unsigned short)pixelsv2); 1022 (vector unsigned short)pixelsv2);
990 pixelssum1 = vec_add(pixelssum1, vcone); 1023 pixelssum1 = vec_add(pixelssum1, vcone);
991 1024
992 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 1025 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
993 for (i = 0; i < h ; i++) { 1026 for (i = 0; i < h ; i++) {
994 int rightside = ((unsigned long)block & 0x0000000F); 1027 int rightside = ((unsigned long)block & 0x0000000F);
995 blockv = vec_ld(0, block); 1028 blockv = vec_ld(0, block);
996 1029
997 temp1 = vec_ld(line_size, pixels); 1030 temp1 = vec_ld(line_size, pixels);
1028 1061
1029 block += line_size; 1062 block += line_size;
1030 pixels += line_size; 1063 pixels += line_size;
1031 } 1064 }
1032 1065
1033 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 1066 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
1034 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 1067 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1035 } 1068 }
1036 1069
1037 /* next one assumes that ((line_size % 16) == 0) */ 1070 /* next one assumes that ((line_size % 16) == 0) */
1038 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) 1071 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
1039 { 1072 {
1040 POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1); 1073 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
1041 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 1074 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
1042 int j; 1075 int j;
1043 POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); 1076 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
1044 for (j = 0; j < 4; j++) { 1077 for (j = 0; j < 4; j++) {
1045 int i; 1078 int i;
1046 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); 1079 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1047 const uint32_t b = 1080 const uint32_t b =
1048 (((const struct unaligned_32 *) (pixels + 1))->l); 1081 (((const struct unaligned_32 *) (pixels + 1))->l);
1071 block += line_size; 1104 block += line_size;
1072 } pixels += 4 - line_size * (h + 1); 1105 } pixels += 4 - line_size * (h + 1);
1073 block += 4 - line_size * h; 1106 block += 4 - line_size * h;
1074 } 1107 }
1075 1108
1076 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); 1109 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
1077 1110
1078 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 1111 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1079 register int i; 1112 register int i;
1080 register vector unsigned char 1113 register vector unsigned char
1081 pixelsv1, pixelsv2, pixelsv3, pixelsv4; 1114 pixelsv1, pixelsv2, pixelsv3, pixelsv4;
1085 pixelssum1, pixelssum2, temp3, 1118 pixelssum1, pixelssum2, temp3,
1086 pixelssum3, pixelssum4, temp4; 1119 pixelssum3, pixelssum4, temp4;
1087 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); 1120 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
1088 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); 1121 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
1089 1122
1090 POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); 1123 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
1091 1124
1092 temp1 = vec_ld(0, pixels); 1125 temp1 = vec_ld(0, pixels);
1093 temp2 = vec_ld(16, pixels); 1126 temp2 = vec_ld(16, pixels);
1094 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); 1127 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1095 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) 1128 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
1149 1182
1150 block += line_size; 1183 block += line_size;
1151 pixels += line_size; 1184 pixels += line_size;
1152 } 1185 }
1153 1186
1154 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); 1187 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
1155 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 1188 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1156 } 1189 }
1157 1190
1158 /* next one assumes that ((line_size % 16) == 0) */ 1191 /* next one assumes that ((line_size % 16) == 0) */
1159 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) 1192 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
1160 { 1193 {
1161 POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); 1194 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
1162 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 1195 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
1163 int j; 1196 int j;
1164 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 1197 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1165 for (j = 0; j < 4; j++) { 1198 for (j = 0; j < 4; j++) {
1166 int i; 1199 int i;
1167 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); 1200 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1168 const uint32_t b = 1201 const uint32_t b =
1169 (((const struct unaligned_32 *) (pixels + 1))->l); 1202 (((const struct unaligned_32 *) (pixels + 1))->l);
1192 block += line_size; 1225 block += line_size;
1193 } pixels += 4 - line_size * (h + 1); 1226 } pixels += 4 - line_size * (h + 1);
1194 block += 4 - line_size * h; 1227 block += 4 - line_size * h;
1195 } 1228 }
1196 1229
1197 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 1230 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1198 1231
1199 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 1232 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1200 register int i; 1233 register int i;
1201 register vector unsigned char 1234 register vector unsigned char
1202 pixelsv1, pixelsv2, pixelsv3, pixelsv4; 1235 pixelsv1, pixelsv2, pixelsv3, pixelsv4;
1207 pixelssum3, pixelssum4, temp4; 1240 pixelssum3, pixelssum4, temp4;
1208 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); 1241 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
1209 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); 1242 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
1210 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); 1243 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
1211 1244
1212 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 1245 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1213 1246
1214 temp1 = vec_ld(0, pixels); 1247 temp1 = vec_ld(0, pixels);
1215 temp2 = vec_ld(16, pixels); 1248 temp2 = vec_ld(16, pixels);
1216 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); 1249 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1217 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) 1250 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
1271 1304
1272 block += line_size; 1305 block += line_size;
1273 pixels += line_size; 1306 pixels += line_size;
1274 } 1307 }
1275 1308
1276 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 1309 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1277 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 1310 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1278 } 1311 }
1279 1312
1280 int has_altivec(void) 1313 int has_altivec(void)
1281 { 1314 {