Mercurial > libavcodec.hg
comparison ppc/dsputil_altivec.c @ 1352:e8ff4783f188 libavcodec
1) remove TBL support in PPC performance. It's much more useful to use the
PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless
code around
2) make the PPC perf stuff a configure option
3) make put_pixels16_altivec a bit faster by unrolling the loop by 4
patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michaelni |
---|---|
date | Wed, 09 Jul 2003 20:18:13 +0000 |
parents | 09b8fe0f0139 |
children | dea5b2946999 |
comparison
equal
deleted
inserted
replaced
1351:0fc1a6f8ca94 | 1352:e8ff4783f188 |
---|---|
653 } | 653 } |
654 | 654 |
655 /* next one assumes that ((line_size % 16) == 0) */ | 655 /* next one assumes that ((line_size % 16) == 0) */ |
656 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 656 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
657 { | 657 { |
658 POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1); | 658 POWERPC_PERF_DECLARE(altivec_put_pixels16_num, 1); |
659 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | 659 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
660 int i; | 660 int i; |
661 | 661 |
662 POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); | 662 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); |
663 | 663 |
664 for(i=0; i<h; i++) { | 664 for(i=0; i<h; i++) { |
665 *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); | 665 *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); |
666 *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l); | 666 *((uint32_t*)(block+4)) = (((const struct unaligned_32 *) (pixels+4))->l); |
667 *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l); | 667 *((uint32_t*)(block+8)) = (((const struct unaligned_32 *) (pixels+8))->l); |
668 *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l); | 668 *((uint32_t*)(block+12)) = (((const struct unaligned_32 *) (pixels+12))->l); |
669 pixels+=line_size; | 669 pixels+=line_size; |
670 block +=line_size; | 670 block +=line_size; |
671 } | 671 } |
672 | 672 |
673 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); | 673 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); |
674 | 674 |
675 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 675 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
676 register vector unsigned char pixelsv1, pixelsv2; | 676 register vector unsigned char pixelsv1, pixelsv2; |
677 register vector unsigned char pixelsv1B, pixelsv2B; | |
678 register vector unsigned char pixelsv1C, pixelsv2C; | |
679 register vector unsigned char pixelsv1D, pixelsv2D; | |
680 | |
677 register vector unsigned char perm = vec_lvsl(0, pixels); | 681 register vector unsigned char perm = vec_lvsl(0, pixels); |
678 int i; | 682 int i; |
679 | 683 register int line_size_2 = line_size << 1; |
680 POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); | 684 register int line_size_3 = line_size + line_size_2; |
681 | 685 register int line_size_4 = line_size << 2; |
686 | |
687 POWERPC_PERF_START_COUNT(altivec_put_pixels16_num, 1); | |
688 // hand-unrolling the loop by 4 gains about 15% | |
689 // mininum execution time goes from 74 to 60 cycles | |
690 // it's faster than -funroll-loops, but using | |
691 // -funroll-loops w/ this is bad - 74 cycles again. | |
692 // all this is on a 7450, tuning for the 7450 | |
693 #if 0 | |
682 for(i=0; i<h; i++) { | 694 for(i=0; i<h; i++) { |
683 pixelsv1 = vec_ld(0, (unsigned char*)pixels); | 695 pixelsv1 = vec_ld(0, (unsigned char*)pixels); |
684 pixelsv2 = vec_ld(16, (unsigned char*)pixels); | 696 pixelsv2 = vec_ld(16, (unsigned char*)pixels); |
685 vec_st(vec_perm(pixelsv1, pixelsv2, perm), | 697 vec_st(vec_perm(pixelsv1, pixelsv2, perm), |
686 0, (unsigned char*)block); | 698 0, (unsigned char*)block); |
687 pixels+=line_size; | 699 pixels+=line_size; |
688 block +=line_size; | 700 block +=line_size; |
689 } | 701 } |
690 | 702 #else |
691 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); | 703 for(i=0; i<h; i+=4) { |
704 pixelsv1 = vec_ld(0, (unsigned char*)pixels); | |
705 pixelsv2 = vec_ld(16, (unsigned char*)pixels); | |
706 pixelsv1B = vec_ld(line_size, (unsigned char*)pixels); | |
707 pixelsv2B = vec_ld(16 + line_size, (unsigned char*)pixels); | |
708 pixelsv1C = vec_ld(line_size_2, (unsigned char*)pixels); | |
709 pixelsv2C = vec_ld(16 + line_size_2, (unsigned char*)pixels); | |
710 pixelsv1D = vec_ld(line_size_3, (unsigned char*)pixels); | |
711 pixelsv2D = vec_ld(16 + line_size_3, (unsigned char*)pixels); | |
712 vec_st(vec_perm(pixelsv1, pixelsv2, perm), | |
713 0, (unsigned char*)block); | |
714 vec_st(vec_perm(pixelsv1B, pixelsv2B, perm), | |
715 line_size, (unsigned char*)block); | |
716 vec_st(vec_perm(pixelsv1C, pixelsv2C, perm), | |
717 line_size_2, (unsigned char*)block); | |
718 vec_st(vec_perm(pixelsv1D, pixelsv2D, perm), | |
719 line_size_3, (unsigned char*)block); | |
720 pixels+=line_size_4; | |
721 block +=line_size_4; | |
722 } | |
723 #endif | |
724 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_num, 1); | |
692 | 725 |
693 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 726 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
694 } | 727 } |
695 | 728 |
696 /* next one assumes that ((line_size % 16) == 0) */ | 729 /* next one assumes that ((line_size % 16) == 0) */ |
697 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) | 730 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) |
698 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 731 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
699 { | 732 { |
700 POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1); | 733 POWERPC_PERF_DECLARE(altivec_avg_pixels16_num, 1); |
701 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | 734 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
702 int i; | 735 int i; |
703 | 736 |
704 POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); | 737 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); |
705 | 738 |
706 for(i=0; i<h; i++) { | 739 for(i=0; i<h; i++) { |
707 op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); | 740 op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); |
708 op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l)); | 741 op_avg(*((uint32_t*)(block+4)),(((const struct unaligned_32 *)(pixels+4))->l)); |
709 op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l)); | 742 op_avg(*((uint32_t*)(block+8)),(((const struct unaligned_32 *)(pixels+8))->l)); |
710 op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l)); | 743 op_avg(*((uint32_t*)(block+12)),(((const struct unaligned_32 *)(pixels+12))->l)); |
711 pixels+=line_size; | 744 pixels+=line_size; |
712 block +=line_size; | 745 block +=line_size; |
713 } | 746 } |
714 | 747 |
715 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); | 748 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); |
716 | 749 |
717 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 750 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
718 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | 751 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; |
719 register vector unsigned char perm = vec_lvsl(0, pixels); | 752 register vector unsigned char perm = vec_lvsl(0, pixels); |
720 int i; | 753 int i; |
721 | 754 |
722 POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); | 755 POWERPC_PERF_START_COUNT(altivec_avg_pixels16_num, 1); |
723 | 756 |
724 for(i=0; i<h; i++) { | 757 for(i=0; i<h; i++) { |
725 pixelsv1 = vec_ld(0, (unsigned char*)pixels); | 758 pixelsv1 = vec_ld(0, (unsigned char*)pixels); |
726 pixelsv2 = vec_ld(16, (unsigned char*)pixels); | 759 pixelsv2 = vec_ld(16, (unsigned char*)pixels); |
727 blockv = vec_ld(0, block); | 760 blockv = vec_ld(0, block); |
730 vec_st(blockv, 0, (unsigned char*)block); | 763 vec_st(blockv, 0, (unsigned char*)block); |
731 pixels+=line_size; | 764 pixels+=line_size; |
732 block +=line_size; | 765 block +=line_size; |
733 } | 766 } |
734 | 767 |
735 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); | 768 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels16_num, 1); |
736 | 769 |
737 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 770 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
738 } | 771 } |
739 | 772 |
740 /* next one assumes that ((line_size % 8) == 0) */ | 773 /* next one assumes that ((line_size % 8) == 0) */ |
741 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | 774 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) |
742 { | 775 { |
743 POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1); | 776 POWERPC_PERF_DECLARE(altivec_avg_pixels8_num, 1); |
744 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | 777 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
745 int i; | 778 int i; |
746 POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); | 779 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); |
747 for (i = 0; i < h; i++) { | 780 for (i = 0; i < h; i++) { |
748 *((uint32_t *) (block)) = | 781 *((uint32_t *) (block)) = |
749 (((*((uint32_t *) (block))) | | 782 (((*((uint32_t *) (block))) | |
750 ((((const struct unaligned_32 *) (pixels))->l))) - | 783 ((((const struct unaligned_32 *) (pixels))->l))) - |
751 ((((*((uint32_t *) (block))) ^ | 784 ((((*((uint32_t *) (block))) ^ |
759 4))-> | 792 4))-> |
760 l))) & 0xFEFEFEFEUL) >> 1)); | 793 l))) & 0xFEFEFEFEUL) >> 1)); |
761 pixels += line_size; | 794 pixels += line_size; |
762 block += line_size; | 795 block += line_size; |
763 } | 796 } |
764 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); | 797 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); |
765 | 798 |
766 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 799 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
767 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; | 800 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; |
768 int i; | 801 int i; |
769 | 802 |
770 POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); | 803 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1); |
771 | 804 |
772 for (i = 0; i < h; i++) { | 805 for (i = 0; i < h; i++) { |
773 /* | 806 /* |
774 block is 8 bytes-aligned, so we're either in the | 807 block is 8 bytes-aligned, so we're either in the |
775 left block (16 bytes-aligned) or in the right block (not) | 808 left block (16 bytes-aligned) or in the right block (not) |
796 | 829 |
797 pixels += line_size; | 830 pixels += line_size; |
798 block += line_size; | 831 block += line_size; |
799 } | 832 } |
800 | 833 |
801 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); | 834 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1); |
802 | 835 |
803 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 836 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
804 } | 837 } |
805 | 838 |
806 /* next one assumes that ((line_size % 8) == 0) */ | 839 /* next one assumes that ((line_size % 8) == 0) */ |
807 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 840 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
808 { | 841 { |
809 POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1); | 842 POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1); |
810 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | 843 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
811 int j; | 844 int j; |
812 POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); | 845 POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); |
813 for (j = 0; j < 2; j++) { | 846 for (j = 0; j < 2; j++) { |
814 int i; | 847 int i; |
815 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | 848 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); |
816 const uint32_t b = | 849 const uint32_t b = |
817 (((const struct unaligned_32 *) (pixels + 1))->l); | 850 (((const struct unaligned_32 *) (pixels + 1))->l); |
840 block += line_size; | 873 block += line_size; |
841 } pixels += 4 - line_size * (h + 1); | 874 } pixels += 4 - line_size * (h + 1); |
842 block += 4 - line_size * h; | 875 block += 4 - line_size * h; |
843 } | 876 } |
844 | 877 |
845 POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | 878 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); |
846 | 879 |
847 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 880 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
848 register int i; | 881 register int i; |
849 register vector unsigned char | 882 register vector unsigned char |
850 pixelsv1, pixelsv2, | 883 pixelsv1, pixelsv2, |
871 pixelsv2 = vec_mergeh(vczero, pixelsv2); | 904 pixelsv2 = vec_mergeh(vczero, pixelsv2); |
872 pixelssum1 = vec_add((vector unsigned short)pixelsv1, | 905 pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
873 (vector unsigned short)pixelsv2); | 906 (vector unsigned short)pixelsv2); |
874 pixelssum1 = vec_add(pixelssum1, vctwo); | 907 pixelssum1 = vec_add(pixelssum1, vctwo); |
875 | 908 |
876 POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); | 909 POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); |
877 for (i = 0; i < h ; i++) { | 910 for (i = 0; i < h ; i++) { |
878 int rightside = ((unsigned long)block & 0x0000000F); | 911 int rightside = ((unsigned long)block & 0x0000000F); |
879 blockv = vec_ld(0, block); | 912 blockv = vec_ld(0, block); |
880 | 913 |
881 temp1 = vec_ld(line_size, pixels); | 914 temp1 = vec_ld(line_size, pixels); |
912 | 945 |
913 block += line_size; | 946 block += line_size; |
914 pixels += line_size; | 947 pixels += line_size; |
915 } | 948 } |
916 | 949 |
917 POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); | 950 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); |
918 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 951 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
919 } | 952 } |
920 | 953 |
921 /* next one assumes that ((line_size % 8) == 0) */ | 954 /* next one assumes that ((line_size % 8) == 0) */ |
922 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | 955 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
923 { | 956 { |
924 POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); | 957 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); |
925 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | 958 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
926 int j; | 959 int j; |
927 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | 960 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); |
928 for (j = 0; j < 2; j++) { | 961 for (j = 0; j < 2; j++) { |
929 int i; | 962 int i; |
930 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | 963 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); |
931 const uint32_t b = | 964 const uint32_t b = |
932 (((const struct unaligned_32 *) (pixels + 1))->l); | 965 (((const struct unaligned_32 *) (pixels + 1))->l); |
955 block += line_size; | 988 block += line_size; |
956 } pixels += 4 - line_size * (h + 1); | 989 } pixels += 4 - line_size * (h + 1); |
957 block += 4 - line_size * h; | 990 block += 4 - line_size * h; |
958 } | 991 } |
959 | 992 |
960 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | 993 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); |
961 | 994 |
962 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 995 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
963 register int i; | 996 register int i; |
964 register vector unsigned char | 997 register vector unsigned char |
965 pixelsv1, pixelsv2, | 998 pixelsv1, pixelsv2, |
987 pixelsv2 = vec_mergeh(vczero, pixelsv2); | 1020 pixelsv2 = vec_mergeh(vczero, pixelsv2); |
988 pixelssum1 = vec_add((vector unsigned short)pixelsv1, | 1021 pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
989 (vector unsigned short)pixelsv2); | 1022 (vector unsigned short)pixelsv2); |
990 pixelssum1 = vec_add(pixelssum1, vcone); | 1023 pixelssum1 = vec_add(pixelssum1, vcone); |
991 | 1024 |
992 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | 1025 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); |
993 for (i = 0; i < h ; i++) { | 1026 for (i = 0; i < h ; i++) { |
994 int rightside = ((unsigned long)block & 0x0000000F); | 1027 int rightside = ((unsigned long)block & 0x0000000F); |
995 blockv = vec_ld(0, block); | 1028 blockv = vec_ld(0, block); |
996 | 1029 |
997 temp1 = vec_ld(line_size, pixels); | 1030 temp1 = vec_ld(line_size, pixels); |
1028 | 1061 |
1029 block += line_size; | 1062 block += line_size; |
1030 pixels += line_size; | 1063 pixels += line_size; |
1031 } | 1064 } |
1032 | 1065 |
1033 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); | 1066 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); |
1034 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1067 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1035 } | 1068 } |
1036 | 1069 |
1037 /* next one assumes that ((line_size % 16) == 0) */ | 1070 /* next one assumes that ((line_size % 16) == 0) */ |
1038 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | 1071 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) |
1039 { | 1072 { |
1040 POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1); | 1073 POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1); |
1041 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | 1074 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
1042 int j; | 1075 int j; |
1043 POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); | 1076 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); |
1044 for (j = 0; j < 4; j++) { | 1077 for (j = 0; j < 4; j++) { |
1045 int i; | 1078 int i; |
1046 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | 1079 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); |
1047 const uint32_t b = | 1080 const uint32_t b = |
1048 (((const struct unaligned_32 *) (pixels + 1))->l); | 1081 (((const struct unaligned_32 *) (pixels + 1))->l); |
1071 block += line_size; | 1104 block += line_size; |
1072 } pixels += 4 - line_size * (h + 1); | 1105 } pixels += 4 - line_size * (h + 1); |
1073 block += 4 - line_size * h; | 1106 block += 4 - line_size * h; |
1074 } | 1107 } |
1075 | 1108 |
1076 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | 1109 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); |
1077 | 1110 |
1078 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1111 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1079 register int i; | 1112 register int i; |
1080 register vector unsigned char | 1113 register vector unsigned char |
1081 pixelsv1, pixelsv2, pixelsv3, pixelsv4; | 1114 pixelsv1, pixelsv2, pixelsv3, pixelsv4; |
1085 pixelssum1, pixelssum2, temp3, | 1118 pixelssum1, pixelssum2, temp3, |
1086 pixelssum3, pixelssum4, temp4; | 1119 pixelssum3, pixelssum4, temp4; |
1087 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | 1120 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
1088 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | 1121 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
1089 | 1122 |
1090 POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); | 1123 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1); |
1091 | 1124 |
1092 temp1 = vec_ld(0, pixels); | 1125 temp1 = vec_ld(0, pixels); |
1093 temp2 = vec_ld(16, pixels); | 1126 temp2 = vec_ld(16, pixels); |
1094 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | 1127 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
1095 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) | 1128 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) |
1149 | 1182 |
1150 block += line_size; | 1183 block += line_size; |
1151 pixels += line_size; | 1184 pixels += line_size; |
1152 } | 1185 } |
1153 | 1186 |
1154 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); | 1187 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); |
1155 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1188 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1156 } | 1189 } |
1157 | 1190 |
1158 /* next one assumes that ((line_size % 16) == 0) */ | 1191 /* next one assumes that ((line_size % 16) == 0) */ |
1159 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) | 1192 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) |
1160 { | 1193 { |
1161 POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1194 POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1162 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | 1195 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
1163 int j; | 1196 int j; |
1164 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1197 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1165 for (j = 0; j < 4; j++) { | 1198 for (j = 0; j < 4; j++) { |
1166 int i; | 1199 int i; |
1167 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); | 1200 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); |
1168 const uint32_t b = | 1201 const uint32_t b = |
1169 (((const struct unaligned_32 *) (pixels + 1))->l); | 1202 (((const struct unaligned_32 *) (pixels + 1))->l); |
1192 block += line_size; | 1225 block += line_size; |
1193 } pixels += 4 - line_size * (h + 1); | 1226 } pixels += 4 - line_size * (h + 1); |
1194 block += 4 - line_size * h; | 1227 block += 4 - line_size * h; |
1195 } | 1228 } |
1196 | 1229 |
1197 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1230 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1198 | 1231 |
1199 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1232 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1200 register int i; | 1233 register int i; |
1201 register vector unsigned char | 1234 register vector unsigned char |
1202 pixelsv1, pixelsv2, pixelsv3, pixelsv4; | 1235 pixelsv1, pixelsv2, pixelsv3, pixelsv4; |
1207 pixelssum3, pixelssum4, temp4; | 1240 pixelssum3, pixelssum4, temp4; |
1208 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); | 1241 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
1209 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); | 1242 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); |
1210 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); | 1243 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
1211 | 1244 |
1212 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1245 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1213 | 1246 |
1214 temp1 = vec_ld(0, pixels); | 1247 temp1 = vec_ld(0, pixels); |
1215 temp2 = vec_ld(16, pixels); | 1248 temp2 = vec_ld(16, pixels); |
1216 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); | 1249 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
1217 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) | 1250 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) |
1271 | 1304 |
1272 block += line_size; | 1305 block += line_size; |
1273 pixels += line_size; | 1306 pixels += line_size; |
1274 } | 1307 } |
1275 | 1308 |
1276 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); | 1309 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); |
1277 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 1310 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1278 } | 1311 } |
1279 | 1312 |
1280 int has_altivec(void) | 1313 int has_altivec(void) |
1281 { | 1314 { |