comparison ppc/dsputil_altivec.c @ 1024:9cc1031e1864 libavcodec

More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Mon, 20 Jan 2003 22:50:14 +0000
parents 35cf2f4a0f8c
children b4172ff70d27
comparison
equal deleted inserted replaced
1023:e61be5796027 1024:9cc1031e1864
1 /* 1 /*
2 * Copyright (c) 2002 Brian Foley 2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley 3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
4 * 5 *
5 * This library is free software; you can redistribute it and/or 6 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public 7 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either 8 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version. 9 * version 2 of the License, or (at your option) any later version.
626 dst[i] = src[i]; 627 dst[i] = src[i];
627 } 628 }
628 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 629 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
629 } 630 }
630 631
631 extern UINT8 cropTbl[]; 632 /* next one assumes that ((line_size % 16) == 0) */
632 void put_pixels_clamped_altivec(const DCTELEM *block, UINT8 *restrict pixels,
633 int line_size)
634 {
635 POWERPC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1);
636 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
637 int i;
638 UINT8 *cm = cropTbl + MAX_NEG_CROP;
639
640 POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
641
642 /* read the pixels */
643 for(i=0;i<8;i++) {
644 pixels[0] = cm[block[0]];
645 pixels[1] = cm[block[1]];
646 pixels[2] = cm[block[2]];
647 pixels[3] = cm[block[3]];
648 pixels[4] = cm[block[4]];
649 pixels[5] = cm[block[5]];
650 pixels[6] = cm[block[6]];
651 pixels[7] = cm[block[7]];
652
653 pixels += line_size;
654 block += 8;
655 }
656
657 POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
658
659 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
660 register const vector short vczero = (const vector short)(0);
661 register vector short
662 blockv0, blockv1, blockv2, blockv3,
663 blockv4, blockv5, blockv6, blockv7;
664 register vector unsigned char
665 pixelsv0, pixelsv1, pixelsv2, pixelsv3, pixelsv4,
666 pixelsv0old, pixelsv4old;
667
668 POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
669
670 blockv0 = vec_ld(0, block);
671 blockv1 = vec_ld(16, block);
672 blockv2 = vec_ld(32, block);
673 blockv3 = vec_ld(48, block);
674 blockv4 = vec_ld(64, block);
675 blockv5 = vec_ld(80, block);
676 blockv6 = vec_ld(96, block);
677 blockv7 = vec_ld(112, block);
678 if (((unsigned long)pixels) & 0x0000000F)
679 {
680 pixelsv0old = vec_ld(-8, pixels);
681 pixelsv4old = vec_ld(56, pixels);
682 pixelsv0 = vec_packsu(vczero, blockv0);
683 pixelsv1 = vec_packsu(blockv1, blockv2);
684 pixelsv2 = vec_packsu(blockv3, blockv4);
685 pixelsv3 = vec_packsu(blockv5, blockv6);
686 pixelsv4 = vec_packsu(blockv5, vczero);
687 pixelsv0 = vec_perm(pixelsv0old, pixelsv0, vcprm(0, 1, s2, s3));
688 pixelsv4 = vec_perm(pixelsv4, pixelsv4old, vcprm(0, 1, s2, s3));
689 vec_st(pixelsv0, -8, pixels);
690 vec_st(pixelsv1, 8, pixels);
691 vec_st(pixelsv2, 24, pixels);
692 vec_st(pixelsv3, 40, pixels);
693 vec_st(pixelsv4, 56, pixels);
694 }
695 else
696 {
697 pixelsv0 = vec_packsu(blockv0, blockv1);
698 pixelsv1 = vec_packsu(blockv2, blockv3);
699 pixelsv2 = vec_packsu(blockv4, blockv5);
700 pixelsv3 = vec_packsu(blockv6, blockv7);
701 vec_st(pixelsv0, 0, pixels);
702 vec_st(pixelsv1, 16, pixels);
703 vec_st(pixelsv2, 32, pixels);
704 vec_st(pixelsv3, 48, pixels);
705 }
706
707 POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
708 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
709 }
710
711 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 633 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
712 { 634 {
713 POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1); 635 POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1);
714 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 636 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
715 int i; 637 int i;
727 649
728 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); 650 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
729 651
730 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 652 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
731 register vector unsigned char pixelsv1, pixelsv2; 653 register vector unsigned char pixelsv1, pixelsv2;
654 register vector unsigned char perm = vec_lvsl(0, pixels);
732 int i; 655 int i;
733 656
734 POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); 657 POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
735 658
736 for(i=0; i<h; i++) { 659 for(i=0; i<h; i++) {
737 pixelsv1 = vec_ld(0, (unsigned char*)pixels); 660 pixelsv1 = vec_ld(0, (unsigned char*)pixels);
738 pixelsv2 = vec_ld(16, (unsigned char*)pixels); 661 pixelsv2 = vec_ld(16, (unsigned char*)pixels);
739 vec_st(vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)), 662 vec_st(vec_perm(pixelsv1, pixelsv2, perm),
740 0, (unsigned char*)block); 663 0, (unsigned char*)block);
741 pixels+=line_size; 664 pixels+=line_size;
742 block +=line_size; 665 block +=line_size;
743 } 666 }
744 667
745 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); 668 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
746 669
747 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 670 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
748 } 671 }
749 672
673 /* next one assumes that ((line_size % 16) == 0) */
750 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) 674 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
751 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 675 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
752 { 676 {
753 POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1); 677 POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
754 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 678 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
767 691
768 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); 692 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
769 693
770 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 694 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
771 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; 695 register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
696 register vector unsigned char perm = vec_lvsl(0, pixels);
772 int i; 697 int i;
773 698
774 POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); 699 POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
775 700
776 for(i=0; i<h; i++) { 701 for(i=0; i<h; i++) {
777 pixelsv1 = vec_ld(0, (unsigned char*)pixels); 702 pixelsv1 = vec_ld(0, (unsigned char*)pixels);
778 pixelsv2 = vec_ld(16, (unsigned char*)pixels); 703 pixelsv2 = vec_ld(16, (unsigned char*)pixels);
779 blockv = vec_ld(0, block); 704 blockv = vec_ld(0, block);
780 pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); 705 pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
781 blockv = vec_avg(blockv,pixelsv); 706 blockv = vec_avg(blockv,pixelsv);
782 vec_st(blockv, 0, (unsigned char*)block); 707 vec_st(blockv, 0, (unsigned char*)block);
783 pixels+=line_size; 708 pixels+=line_size;
784 block +=line_size; 709 block +=line_size;
785 } 710 }
787 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); 712 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
788 713
789 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 714 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
790 } 715 }
791 716
792 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, 717 /* next one assumes that ((line_size % 8) == 0) */
793 int line_size, int h) 718 void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
794 { 719 {
795 POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1); 720 POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1);
796 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 721 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
797 int i; 722 int i;
798 POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); 723 POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
853 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); 778 POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
854 779
855 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 780 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
856 } 781 }
857 782
783 /* next one assumes that ((line_size % 8) == 0) */
858 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) 784 void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
859 { 785 {
860 POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1); 786 POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1);
861 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 787 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
862 int j; 788 int j;
967 893
968 POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); 894 POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
969 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 895 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
970 } 896 }
971 897
898 /* next one assumes that ((line_size % 8) == 0) */
899 void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
900 {
901 POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
902 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
903 int j;
904 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
905 for (j = 0; j < 2; j++) {
906 int i;
907 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
908 const uint32_t b =
909 (((const struct unaligned_32 *) (pixels + 1))->l);
910 uint32_t l0 =
911 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
912 uint32_t h0 =
913 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
914 uint32_t l1, h1;
915 pixels += line_size;
916 for (i = 0; i < h; i += 2) {
917 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
918 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
919 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
920 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
921 *((uint32_t *) block) =
922 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
923 pixels += line_size;
924 block += line_size;
925 a = (((const struct unaligned_32 *) (pixels))->l);
926 b = (((const struct unaligned_32 *) (pixels + 1))->l);
927 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
928 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
929 *((uint32_t *) block) =
930 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
931 pixels += line_size;
932 block += line_size;
933 } pixels += 4 - line_size * (h + 1);
934 block += 4 - line_size * h;
935 }
936
937 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
938
939 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
940 register int i;
941 register vector unsigned char
942 pixelsv1, pixelsv2,
943 pixelsavg;
944 register vector unsigned char
945 blockv, temp1, temp2;
946 register vector unsigned short
947 pixelssum1, pixelssum2, temp3;
948 register const vector unsigned char vczero = (const vector unsigned char)(0);
949 register const vector unsigned short vcone = (const vector unsigned short)(1);
950 register const vector unsigned short vctwo = (const vector unsigned short)(2);
951
952 temp1 = vec_ld(0, pixels);
953 temp2 = vec_ld(16, pixels);
954 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
955 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
956 {
957 pixelsv2 = temp2;
958 }
959 else
960 {
961 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
962 }
963 pixelsv1 = vec_mergeh(vczero, pixelsv1);
964 pixelsv2 = vec_mergeh(vczero, pixelsv2);
965 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
966 (vector unsigned short)pixelsv2);
967 pixelssum1 = vec_add(pixelssum1, vcone);
968
969 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
970 for (i = 0; i < h ; i++) {
971 int rightside = ((unsigned long)block & 0x0000000F);
972 blockv = vec_ld(0, block);
973
974 temp1 = vec_ld(line_size, pixels);
975 temp2 = vec_ld(line_size + 16, pixels);
976 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
977 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
978 {
979 pixelsv2 = temp2;
980 }
981 else
982 {
983 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
984 }
985
986 pixelsv1 = vec_mergeh(vczero, pixelsv1);
987 pixelsv2 = vec_mergeh(vczero, pixelsv2);
988 pixelssum2 = vec_add((vector unsigned short)pixelsv1,
989 (vector unsigned short)pixelsv2);
990 temp3 = vec_add(pixelssum1, pixelssum2);
991 temp3 = vec_sra(temp3, vctwo);
992 pixelssum1 = vec_add(pixelssum2, vcone);
993 pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
994
995 if (rightside)
996 {
997 blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
998 }
999 else
1000 {
1001 blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
1002 }
1003
1004 vec_st(blockv, 0, block);
1005
1006 block += line_size;
1007 pixels += line_size;
1008 }
1009
1010 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
1011 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1012 }
1013
1014 /* next one assumes that ((line_size % 16) == 0) */
1015 void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
1016 {
1017 POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1);
1018 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
1019 int j;
1020 POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
1021 for (j = 0; j < 4; j++) {
1022 int i;
1023 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1024 const uint32_t b =
1025 (((const struct unaligned_32 *) (pixels + 1))->l);
1026 uint32_t l0 =
1027 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1028 uint32_t h0 =
1029 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1030 uint32_t l1, h1;
1031 pixels += line_size;
1032 for (i = 0; i < h; i += 2) {
1033 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1034 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
1035 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
1036 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1037 *((uint32_t *) block) =
1038 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1039 pixels += line_size;
1040 block += line_size;
1041 a = (((const struct unaligned_32 *) (pixels))->l);
1042 b = (((const struct unaligned_32 *) (pixels + 1))->l);
1043 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1044 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1045 *((uint32_t *) block) =
1046 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1047 pixels += line_size;
1048 block += line_size;
1049 } pixels += 4 - line_size * (h + 1);
1050 block += 4 - line_size * h;
1051 }
1052
1053 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
1054
1055 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1056 register int i;
1057 register vector unsigned char
1058 pixelsv1, pixelsv2, pixelsv3, pixelsv4;
1059 register vector unsigned char
1060 blockv, temp1, temp2;
1061 register vector unsigned short
1062 pixelssum1, pixelssum2, temp3,
1063 pixelssum3, pixelssum4, temp4;
1064 register const vector unsigned char vczero = (const vector unsigned char)(0);
1065 register const vector unsigned short vctwo = (const vector unsigned short)(2);
1066
1067 temp1 = vec_ld(0, pixels);
1068 temp2 = vec_ld(16, pixels);
1069 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1070 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
1071 {
1072 pixelsv2 = temp2;
1073 }
1074 else
1075 {
1076 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
1077 }
1078 pixelsv3 = vec_mergel(vczero, pixelsv1);
1079 pixelsv4 = vec_mergel(vczero, pixelsv2);
1080 pixelsv1 = vec_mergeh(vczero, pixelsv1);
1081 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1082 pixelssum3 = vec_add((vector unsigned short)pixelsv3,
1083 (vector unsigned short)pixelsv4);
1084 pixelssum3 = vec_add(pixelssum3, vctwo);
1085 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
1086 (vector unsigned short)pixelsv2);
1087 pixelssum1 = vec_add(pixelssum1, vctwo);
1088
1089 POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
1090 for (i = 0; i < h ; i++) {
1091 blockv = vec_ld(0, block);
1092
1093 temp1 = vec_ld(line_size, pixels);
1094 temp2 = vec_ld(line_size + 16, pixels);
1095 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
1096 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
1097 {
1098 pixelsv2 = temp2;
1099 }
1100 else
1101 {
1102 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
1103 }
1104
1105 pixelsv3 = vec_mergel(vczero, pixelsv1);
1106 pixelsv4 = vec_mergel(vczero, pixelsv2);
1107 pixelsv1 = vec_mergeh(vczero, pixelsv1);
1108 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1109
1110 pixelssum4 = vec_add((vector unsigned short)pixelsv3,
1111 (vector unsigned short)pixelsv4);
1112 pixelssum2 = vec_add((vector unsigned short)pixelsv1,
1113 (vector unsigned short)pixelsv2);
1114 temp4 = vec_add(pixelssum3, pixelssum4);
1115 temp4 = vec_sra(temp4, vctwo);
1116 temp3 = vec_add(pixelssum1, pixelssum2);
1117 temp3 = vec_sra(temp3, vctwo);
1118
1119 pixelssum3 = vec_add(pixelssum4, vctwo);
1120 pixelssum1 = vec_add(pixelssum2, vctwo);
1121
1122 blockv = vec_packsu(temp3, temp4);
1123
1124 vec_st(blockv, 0, block);
1125
1126 block += line_size;
1127 pixels += line_size;
1128 }
1129
1130 POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
1131 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1132 }
1133
1134 /* next one assumes that ((line_size % 16) == 0) */
1135 void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h)
1136 {
1137 POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
1138 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
1139 int j;
1140 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1141 for (j = 0; j < 4; j++) {
1142 int i;
1143 const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1144 const uint32_t b =
1145 (((const struct unaligned_32 *) (pixels + 1))->l);
1146 uint32_t l0 =
1147 (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
1148 uint32_t h0 =
1149 ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1150 uint32_t l1, h1;
1151 pixels += line_size;
1152 for (i = 0; i < h; i += 2) {
1153 uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
1154 uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
1155 l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
1156 h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1157 *((uint32_t *) block) =
1158 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1159 pixels += line_size;
1160 block += line_size;
1161 a = (((const struct unaligned_32 *) (pixels))->l);
1162 b = (((const struct unaligned_32 *) (pixels + 1))->l);
1163 l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL;
1164 h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1165 *((uint32_t *) block) =
1166 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1167 pixels += line_size;
1168 block += line_size;
1169 } pixels += 4 - line_size * (h + 1);
1170 block += 4 - line_size * h;
1171 }
1172
1173 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1174
1175 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1176 register int i;
1177 register vector unsigned char
1178 pixelsv1, pixelsv2, pixelsv3, pixelsv4;
1179 register vector unsigned char
1180 blockv, temp1, temp2;
1181 register vector unsigned short
1182 pixelssum1, pixelssum2, temp3,
1183 pixelssum3, pixelssum4, temp4;
1184 register const vector unsigned char vczero = (const vector unsigned char)(0);
1185 register const vector unsigned short vcone = (const vector unsigned short)(1);
1186 register const vector unsigned short vctwo = (const vector unsigned short)(2);
1187
1188 temp1 = vec_ld(0, pixels);
1189 temp2 = vec_ld(16, pixels);
1190 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1191 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
1192 {
1193 pixelsv2 = temp2;
1194 }
1195 else
1196 {
1197 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
1198 }
1199 pixelsv3 = vec_mergel(vczero, pixelsv1);
1200 pixelsv4 = vec_mergel(vczero, pixelsv2);
1201 pixelsv1 = vec_mergeh(vczero, pixelsv1);
1202 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1203 pixelssum3 = vec_add((vector unsigned short)pixelsv3,
1204 (vector unsigned short)pixelsv4);
1205 pixelssum3 = vec_add(pixelssum3, vcone);
1206 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
1207 (vector unsigned short)pixelsv2);
1208 pixelssum1 = vec_add(pixelssum1, vcone);
1209
1210 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1211 for (i = 0; i < h ; i++) {
1212 blockv = vec_ld(0, block);
1213
1214 temp1 = vec_ld(line_size, pixels);
1215 temp2 = vec_ld(line_size + 16, pixels);
1216 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
1217 if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
1218 {
1219 pixelsv2 = temp2;
1220 }
1221 else
1222 {
1223 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
1224 }
1225
1226 pixelsv3 = vec_mergel(vczero, pixelsv1);
1227 pixelsv4 = vec_mergel(vczero, pixelsv2);
1228 pixelsv1 = vec_mergeh(vczero, pixelsv1);
1229 pixelsv2 = vec_mergeh(vczero, pixelsv2);
1230
1231 pixelssum4 = vec_add((vector unsigned short)pixelsv3,
1232 (vector unsigned short)pixelsv4);
1233 pixelssum2 = vec_add((vector unsigned short)pixelsv1,
1234 (vector unsigned short)pixelsv2);
1235 temp4 = vec_add(pixelssum3, pixelssum4);
1236 temp4 = vec_sra(temp4, vctwo);
1237 temp3 = vec_add(pixelssum1, pixelssum2);
1238 temp3 = vec_sra(temp3, vctwo);
1239
1240 pixelssum3 = vec_add(pixelssum4, vcone);
1241 pixelssum1 = vec_add(pixelssum2, vcone);
1242
1243 blockv = vec_packsu(temp3, temp4);
1244
1245 vec_st(blockv, 0, block);
1246
1247 block += line_size;
1248 pixels += line_size;
1249 }
1250
1251 POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1252 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1253 }
1254
972 int has_altivec(void) 1255 int has_altivec(void)
973 { 1256 {
974 #if CONFIG_DARWIN 1257 #if CONFIG_DARWIN
975 int sels[2] = {CTL_HW, HW_VECTORUNIT}; 1258 int sels[2] = {CTL_HW, HW_VECTORUNIT};
976 int has_vu = 0; 1259 int has_vu = 0;