Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 2967:ef2149182f1c libavcodec
COSMETICS: Remove all trailing whitespace.
author | diego |
---|---|
date | Sat, 17 Dec 2005 18:14:38 +0000 |
parents | 8aa244d7c274 |
children | bfabfdf9ce55 |
comparison
equal
deleted
inserted
replaced
2966:564788471dd4 | 2967:ef2149182f1c |
---|---|
600 | 600 |
601 static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | 601 static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){ |
602 const int strength= ff_h263_loop_filter_strength[qscale]; | 602 const int strength= ff_h263_loop_filter_strength[qscale]; |
603 | 603 |
604 asm volatile( | 604 asm volatile( |
605 | 605 |
606 H263_LOOP_FILTER | 606 H263_LOOP_FILTER |
607 | 607 |
608 "movq %%mm3, %1 \n\t" | 608 "movq %%mm3, %1 \n\t" |
609 "movq %%mm4, %2 \n\t" | 609 "movq %%mm4, %2 \n\t" |
610 "movq %%mm5, %0 \n\t" | 610 "movq %%mm5, %0 \n\t" |
611 "movq %%mm6, %3 \n\t" | 611 "movq %%mm6, %3 \n\t" |
612 : "+m" (*(uint64_t*)(src - 2*stride)), | 612 : "+m" (*(uint64_t*)(src - 2*stride)), |
632 "punpckhdq %%mm0, %%mm0 \n\t" | 632 "punpckhdq %%mm0, %%mm0 \n\t" |
633 "movd %%mm0, %1 \n\t" | 633 "movd %%mm0, %1 \n\t" |
634 "movd %%mm1, %2 \n\t" | 634 "movd %%mm1, %2 \n\t" |
635 "punpckhdq %%mm1, %%mm1 \n\t" | 635 "punpckhdq %%mm1, %%mm1 \n\t" |
636 "movd %%mm1, %3 \n\t" | 636 "movd %%mm1, %3 \n\t" |
637 | 637 |
638 : "=m" (*(uint32_t*)(dst + 0*dst_stride)), | 638 : "=m" (*(uint32_t*)(dst + 0*dst_stride)), |
639 "=m" (*(uint32_t*)(dst + 1*dst_stride)), | 639 "=m" (*(uint32_t*)(dst + 1*dst_stride)), |
640 "=m" (*(uint32_t*)(dst + 2*dst_stride)), | 640 "=m" (*(uint32_t*)(dst + 2*dst_stride)), |
641 "=m" (*(uint32_t*)(dst + 3*dst_stride)) | 641 "=m" (*(uint32_t*)(dst + 3*dst_stride)) |
642 : "m" (*(uint32_t*)(src + 0*src_stride)), | 642 : "m" (*(uint32_t*)(src + 0*src_stride)), |
648 | 648 |
649 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | 649 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ |
650 const int strength= ff_h263_loop_filter_strength[qscale]; | 650 const int strength= ff_h263_loop_filter_strength[qscale]; |
651 uint64_t temp[4] __attribute__ ((aligned(8))); | 651 uint64_t temp[4] __attribute__ ((aligned(8))); |
652 uint8_t *btemp= (uint8_t*)temp; | 652 uint8_t *btemp= (uint8_t*)temp; |
653 | 653 |
654 src -= 2; | 654 src -= 2; |
655 | 655 |
656 transpose4x4(btemp , src , 8, stride); | 656 transpose4x4(btemp , src , 8, stride); |
657 transpose4x4(btemp+4, src + 4*stride, 8, stride); | 657 transpose4x4(btemp+4, src + 4*stride, 8, stride); |
658 asm volatile( | 658 asm volatile( |
659 H263_LOOP_FILTER // 5 3 4 6 | 659 H263_LOOP_FILTER // 5 3 4 6 |
660 | 660 |
661 : "+m" (temp[0]), | 661 : "+m" (temp[0]), |
662 "+m" (temp[1]), | 662 "+m" (temp[1]), |
663 "+m" (temp[2]), | 663 "+m" (temp[2]), |
664 "+m" (temp[3]) | 664 "+m" (temp[3]) |
665 : "g" (2*strength), "m"(ff_pb_FC) | 665 : "g" (2*strength), "m"(ff_pb_FC) |
794 | 794 |
795 "movq %%mm7,%%mm1\n" | 795 "movq %%mm7,%%mm1\n" |
796 "psrlq $32, %%mm7\n" /* shift hi dword to lo */ | 796 "psrlq $32, %%mm7\n" /* shift hi dword to lo */ |
797 "paddd %%mm7,%%mm1\n" | 797 "paddd %%mm7,%%mm1\n" |
798 "movd %%mm1,%2\n" | 798 "movd %%mm1,%2\n" |
799 : "+r" (pix1), "+r" (pix2), "=r"(tmp) | 799 : "+r" (pix1), "+r" (pix2), "=r"(tmp) |
800 : "r" ((long)line_size) , "m" (h) | 800 : "r" ((long)line_size) , "m" (h) |
801 : "%ecx"); | 801 : "%ecx"); |
802 return tmp; | 802 return tmp; |
803 } | 803 } |
804 | 804 |
854 | 854 |
855 "movq %%mm7,%%mm1\n" | 855 "movq %%mm7,%%mm1\n" |
856 "psrlq $32, %%mm7\n" /* shift hi dword to lo */ | 856 "psrlq $32, %%mm7\n" /* shift hi dword to lo */ |
857 "paddd %%mm7,%%mm1\n" | 857 "paddd %%mm7,%%mm1\n" |
858 "movd %%mm1,%2\n" | 858 "movd %%mm1,%2\n" |
859 : "+r" (pix1), "+r" (pix2), "=r"(tmp) | 859 : "+r" (pix1), "+r" (pix2), "=r"(tmp) |
860 : "r" ((long)line_size) , "m" (h) | 860 : "r" ((long)line_size) , "m" (h) |
861 : "%ecx"); | 861 : "%ecx"); |
862 return tmp; | 862 return tmp; |
863 } | 863 } |
864 | 864 |
917 "paddd %%xmm1,%%xmm7\n" | 917 "paddd %%xmm1,%%xmm7\n" |
918 "movdqa %%xmm7,%%xmm1\n" | 918 "movdqa %%xmm7,%%xmm1\n" |
919 "psrldq $4, %%xmm7\n" /* shift hi dword to lo */ | 919 "psrldq $4, %%xmm7\n" /* shift hi dword to lo */ |
920 "paddd %%xmm1,%%xmm7\n" | 920 "paddd %%xmm1,%%xmm7\n" |
921 "movd %%xmm7,%3\n" | 921 "movd %%xmm7,%3\n" |
922 : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp) | 922 : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp) |
923 : "r" ((long)line_size)); | 923 : "r" ((long)line_size)); |
924 return tmp; | 924 return tmp; |
925 } | 925 } |
926 | 926 |
927 static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { | 927 static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { |
928 int tmp; | 928 int tmp; |
929 asm volatile ( | 929 asm volatile ( |
930 "movl %3,%%ecx\n" | 930 "movl %3,%%ecx\n" |
931 "pxor %%mm7,%%mm7\n" | 931 "pxor %%mm7,%%mm7\n" |
932 "pxor %%mm6,%%mm6\n" | 932 "pxor %%mm6,%%mm6\n" |
933 | 933 |
934 "movq (%0),%%mm0\n" | 934 "movq (%0),%%mm0\n" |
935 "movq %%mm0, %%mm1\n" | 935 "movq %%mm0, %%mm1\n" |
936 "psllq $8, %%mm0\n" | 936 "psllq $8, %%mm0\n" |
937 "psrlq $8, %%mm1\n" | 937 "psrlq $8, %%mm1\n" |
938 "psrlq $8, %%mm0\n" | 938 "psrlq $8, %%mm0\n" |
942 "punpcklbw %%mm7,%%mm1\n" | 942 "punpcklbw %%mm7,%%mm1\n" |
943 "punpckhbw %%mm7,%%mm2\n" | 943 "punpckhbw %%mm7,%%mm2\n" |
944 "punpckhbw %%mm7,%%mm3\n" | 944 "punpckhbw %%mm7,%%mm3\n" |
945 "psubw %%mm1, %%mm0\n" | 945 "psubw %%mm1, %%mm0\n" |
946 "psubw %%mm3, %%mm2\n" | 946 "psubw %%mm3, %%mm2\n" |
947 | 947 |
948 "add %2,%0\n" | 948 "add %2,%0\n" |
949 | 949 |
950 "movq (%0),%%mm4\n" | 950 "movq (%0),%%mm4\n" |
951 "movq %%mm4, %%mm1\n" | 951 "movq %%mm4, %%mm1\n" |
952 "psllq $8, %%mm4\n" | 952 "psllq $8, %%mm4\n" |
953 "psrlq $8, %%mm1\n" | 953 "psrlq $8, %%mm1\n" |
954 "psrlq $8, %%mm4\n" | 954 "psrlq $8, %%mm4\n" |
966 "pxor %%mm1, %%mm1\n" | 966 "pxor %%mm1, %%mm1\n" |
967 "pcmpgtw %%mm0, %%mm3\n\t" | 967 "pcmpgtw %%mm0, %%mm3\n\t" |
968 "pcmpgtw %%mm2, %%mm1\n\t" | 968 "pcmpgtw %%mm2, %%mm1\n\t" |
969 "pxor %%mm3, %%mm0\n" | 969 "pxor %%mm3, %%mm0\n" |
970 "pxor %%mm1, %%mm2\n" | 970 "pxor %%mm1, %%mm2\n" |
971 "psubw %%mm3, %%mm0\n" | 971 "psubw %%mm3, %%mm0\n" |
972 "psubw %%mm1, %%mm2\n" | 972 "psubw %%mm1, %%mm2\n" |
973 "paddw %%mm0, %%mm2\n" | 973 "paddw %%mm0, %%mm2\n" |
974 "paddw %%mm2, %%mm6\n" | 974 "paddw %%mm2, %%mm6\n" |
975 | 975 |
976 "add %2,%0\n" | 976 "add %2,%0\n" |
977 "1:\n" | 977 "1:\n" |
978 | 978 |
979 "movq (%0),%%mm0\n" | 979 "movq (%0),%%mm0\n" |
980 "movq %%mm0, %%mm1\n" | 980 "movq %%mm0, %%mm1\n" |
981 "psllq $8, %%mm0\n" | 981 "psllq $8, %%mm0\n" |
982 "psrlq $8, %%mm1\n" | 982 "psrlq $8, %%mm1\n" |
983 "psrlq $8, %%mm0\n" | 983 "psrlq $8, %%mm0\n" |
995 "pxor %%mm1, %%mm1\n" | 995 "pxor %%mm1, %%mm1\n" |
996 "pcmpgtw %%mm4, %%mm3\n\t" | 996 "pcmpgtw %%mm4, %%mm3\n\t" |
997 "pcmpgtw %%mm5, %%mm1\n\t" | 997 "pcmpgtw %%mm5, %%mm1\n\t" |
998 "pxor %%mm3, %%mm4\n" | 998 "pxor %%mm3, %%mm4\n" |
999 "pxor %%mm1, %%mm5\n" | 999 "pxor %%mm1, %%mm5\n" |
1000 "psubw %%mm3, %%mm4\n" | 1000 "psubw %%mm3, %%mm4\n" |
1001 "psubw %%mm1, %%mm5\n" | 1001 "psubw %%mm1, %%mm5\n" |
1002 "paddw %%mm4, %%mm5\n" | 1002 "paddw %%mm4, %%mm5\n" |
1003 "paddw %%mm5, %%mm6\n" | 1003 "paddw %%mm5, %%mm6\n" |
1004 | 1004 |
1005 "add %2,%0\n" | 1005 "add %2,%0\n" |
1006 | 1006 |
1007 "movq (%0),%%mm4\n" | 1007 "movq (%0),%%mm4\n" |
1008 "movq %%mm4, %%mm1\n" | 1008 "movq %%mm4, %%mm1\n" |
1009 "psllq $8, %%mm4\n" | 1009 "psllq $8, %%mm4\n" |
1010 "psrlq $8, %%mm1\n" | 1010 "psrlq $8, %%mm1\n" |
1011 "psrlq $8, %%mm4\n" | 1011 "psrlq $8, %%mm4\n" |
1023 "pxor %%mm1, %%mm1\n" | 1023 "pxor %%mm1, %%mm1\n" |
1024 "pcmpgtw %%mm0, %%mm3\n\t" | 1024 "pcmpgtw %%mm0, %%mm3\n\t" |
1025 "pcmpgtw %%mm2, %%mm1\n\t" | 1025 "pcmpgtw %%mm2, %%mm1\n\t" |
1026 "pxor %%mm3, %%mm0\n" | 1026 "pxor %%mm3, %%mm0\n" |
1027 "pxor %%mm1, %%mm2\n" | 1027 "pxor %%mm1, %%mm2\n" |
1028 "psubw %%mm3, %%mm0\n" | 1028 "psubw %%mm3, %%mm0\n" |
1029 "psubw %%mm1, %%mm2\n" | 1029 "psubw %%mm1, %%mm2\n" |
1030 "paddw %%mm0, %%mm2\n" | 1030 "paddw %%mm0, %%mm2\n" |
1031 "paddw %%mm2, %%mm6\n" | 1031 "paddw %%mm2, %%mm6\n" |
1032 | 1032 |
1033 "add %2,%0\n" | 1033 "add %2,%0\n" |
1036 | 1036 |
1037 "movq %%mm6, %%mm0\n" | 1037 "movq %%mm6, %%mm0\n" |
1038 "punpcklwd %%mm7,%%mm0\n" | 1038 "punpcklwd %%mm7,%%mm0\n" |
1039 "punpckhwd %%mm7,%%mm6\n" | 1039 "punpckhwd %%mm7,%%mm6\n" |
1040 "paddd %%mm0, %%mm6\n" | 1040 "paddd %%mm0, %%mm6\n" |
1041 | 1041 |
1042 "movq %%mm6,%%mm0\n" | 1042 "movq %%mm6,%%mm0\n" |
1043 "psrlq $32, %%mm6\n" | 1043 "psrlq $32, %%mm6\n" |
1044 "paddd %%mm6,%%mm0\n" | 1044 "paddd %%mm6,%%mm0\n" |
1045 "movd %%mm0,%1\n" | 1045 "movd %%mm0,%1\n" |
1046 : "+r" (pix1), "=r"(tmp) | 1046 : "+r" (pix1), "=r"(tmp) |
1047 : "r" ((long)line_size) , "g" (h-2) | 1047 : "r" ((long)line_size) , "g" (h-2) |
1048 : "%ecx"); | 1048 : "%ecx"); |
1049 return tmp; | 1049 return tmp; |
1050 } | 1050 } |
1051 | 1051 |
1054 uint8_t * pix= pix1; | 1054 uint8_t * pix= pix1; |
1055 asm volatile ( | 1055 asm volatile ( |
1056 "movl %3,%%ecx\n" | 1056 "movl %3,%%ecx\n" |
1057 "pxor %%mm7,%%mm7\n" | 1057 "pxor %%mm7,%%mm7\n" |
1058 "pxor %%mm6,%%mm6\n" | 1058 "pxor %%mm6,%%mm6\n" |
1059 | 1059 |
1060 "movq (%0),%%mm0\n" | 1060 "movq (%0),%%mm0\n" |
1061 "movq 1(%0),%%mm1\n" | 1061 "movq 1(%0),%%mm1\n" |
1062 "movq %%mm0, %%mm2\n" | 1062 "movq %%mm0, %%mm2\n" |
1063 "movq %%mm1, %%mm3\n" | 1063 "movq %%mm1, %%mm3\n" |
1064 "punpcklbw %%mm7,%%mm0\n" | 1064 "punpcklbw %%mm7,%%mm0\n" |
1065 "punpcklbw %%mm7,%%mm1\n" | 1065 "punpcklbw %%mm7,%%mm1\n" |
1066 "punpckhbw %%mm7,%%mm2\n" | 1066 "punpckhbw %%mm7,%%mm2\n" |
1067 "punpckhbw %%mm7,%%mm3\n" | 1067 "punpckhbw %%mm7,%%mm3\n" |
1068 "psubw %%mm1, %%mm0\n" | 1068 "psubw %%mm1, %%mm0\n" |
1069 "psubw %%mm3, %%mm2\n" | 1069 "psubw %%mm3, %%mm2\n" |
1070 | 1070 |
1071 "add %2,%0\n" | 1071 "add %2,%0\n" |
1072 | 1072 |
1073 "movq (%0),%%mm4\n" | 1073 "movq (%0),%%mm4\n" |
1074 "movq 1(%0),%%mm1\n" | 1074 "movq 1(%0),%%mm1\n" |
1075 "movq %%mm4, %%mm5\n" | 1075 "movq %%mm4, %%mm5\n" |
1076 "movq %%mm1, %%mm3\n" | 1076 "movq %%mm1, %%mm3\n" |
1077 "punpcklbw %%mm7,%%mm4\n" | 1077 "punpcklbw %%mm7,%%mm4\n" |
1086 "pxor %%mm1, %%mm1\n" | 1086 "pxor %%mm1, %%mm1\n" |
1087 "pcmpgtw %%mm0, %%mm3\n\t" | 1087 "pcmpgtw %%mm0, %%mm3\n\t" |
1088 "pcmpgtw %%mm2, %%mm1\n\t" | 1088 "pcmpgtw %%mm2, %%mm1\n\t" |
1089 "pxor %%mm3, %%mm0\n" | 1089 "pxor %%mm3, %%mm0\n" |
1090 "pxor %%mm1, %%mm2\n" | 1090 "pxor %%mm1, %%mm2\n" |
1091 "psubw %%mm3, %%mm0\n" | 1091 "psubw %%mm3, %%mm0\n" |
1092 "psubw %%mm1, %%mm2\n" | 1092 "psubw %%mm1, %%mm2\n" |
1093 "paddw %%mm0, %%mm2\n" | 1093 "paddw %%mm0, %%mm2\n" |
1094 "paddw %%mm2, %%mm6\n" | 1094 "paddw %%mm2, %%mm6\n" |
1095 | 1095 |
1096 "add %2,%0\n" | 1096 "add %2,%0\n" |
1097 "1:\n" | 1097 "1:\n" |
1098 | 1098 |
1099 "movq (%0),%%mm0\n" | 1099 "movq (%0),%%mm0\n" |
1100 "movq 1(%0),%%mm1\n" | 1100 "movq 1(%0),%%mm1\n" |
1101 "movq %%mm0, %%mm2\n" | 1101 "movq %%mm0, %%mm2\n" |
1102 "movq %%mm1, %%mm3\n" | 1102 "movq %%mm1, %%mm3\n" |
1103 "punpcklbw %%mm7,%%mm0\n" | 1103 "punpcklbw %%mm7,%%mm0\n" |
1116 "pxor %%mm1, %%mm5\n" | 1116 "pxor %%mm1, %%mm5\n" |
1117 "psubw %%mm3, %%mm4\n" | 1117 "psubw %%mm3, %%mm4\n" |
1118 "psubw %%mm1, %%mm5\n" | 1118 "psubw %%mm1, %%mm5\n" |
1119 "paddw %%mm4, %%mm5\n" | 1119 "paddw %%mm4, %%mm5\n" |
1120 "paddw %%mm5, %%mm6\n" | 1120 "paddw %%mm5, %%mm6\n" |
1121 | 1121 |
1122 "add %2,%0\n" | 1122 "add %2,%0\n" |
1123 | 1123 |
1124 "movq (%0),%%mm4\n" | 1124 "movq (%0),%%mm4\n" |
1125 "movq 1(%0),%%mm1\n" | 1125 "movq 1(%0),%%mm1\n" |
1126 "movq %%mm4, %%mm5\n" | 1126 "movq %%mm4, %%mm5\n" |
1127 "movq %%mm1, %%mm3\n" | 1127 "movq %%mm1, %%mm3\n" |
1128 "punpcklbw %%mm7,%%mm4\n" | 1128 "punpcklbw %%mm7,%%mm4\n" |
1137 "pxor %%mm1, %%mm1\n" | 1137 "pxor %%mm1, %%mm1\n" |
1138 "pcmpgtw %%mm0, %%mm3\n\t" | 1138 "pcmpgtw %%mm0, %%mm3\n\t" |
1139 "pcmpgtw %%mm2, %%mm1\n\t" | 1139 "pcmpgtw %%mm2, %%mm1\n\t" |
1140 "pxor %%mm3, %%mm0\n" | 1140 "pxor %%mm3, %%mm0\n" |
1141 "pxor %%mm1, %%mm2\n" | 1141 "pxor %%mm1, %%mm2\n" |
1142 "psubw %%mm3, %%mm0\n" | 1142 "psubw %%mm3, %%mm0\n" |
1143 "psubw %%mm1, %%mm2\n" | 1143 "psubw %%mm1, %%mm2\n" |
1144 "paddw %%mm0, %%mm2\n" | 1144 "paddw %%mm0, %%mm2\n" |
1145 "paddw %%mm2, %%mm6\n" | 1145 "paddw %%mm2, %%mm6\n" |
1146 | 1146 |
1147 "add %2,%0\n" | 1147 "add %2,%0\n" |
1150 | 1150 |
1151 "movq %%mm6, %%mm0\n" | 1151 "movq %%mm6, %%mm0\n" |
1152 "punpcklwd %%mm7,%%mm0\n" | 1152 "punpcklwd %%mm7,%%mm0\n" |
1153 "punpckhwd %%mm7,%%mm6\n" | 1153 "punpckhwd %%mm7,%%mm6\n" |
1154 "paddd %%mm0, %%mm6\n" | 1154 "paddd %%mm0, %%mm6\n" |
1155 | 1155 |
1156 "movq %%mm6,%%mm0\n" | 1156 "movq %%mm6,%%mm0\n" |
1157 "psrlq $32, %%mm6\n" | 1157 "psrlq $32, %%mm6\n" |
1158 "paddd %%mm6,%%mm0\n" | 1158 "paddd %%mm6,%%mm0\n" |
1159 "movd %%mm0,%1\n" | 1159 "movd %%mm0,%1\n" |
1160 : "+r" (pix1), "=r"(tmp) | 1160 : "+r" (pix1), "=r"(tmp) |
1161 : "r" ((long)line_size) , "g" (h-2) | 1161 : "r" ((long)line_size) , "g" (h-2) |
1162 : "%ecx"); | 1162 : "%ecx"); |
1163 return tmp + hf_noise8_mmx(pix+8, line_size, h); | 1163 return tmp + hf_noise8_mmx(pix+8, line_size, h); |
1164 } | 1164 } |
1165 | 1165 |
1184 else return score1 + ABS(score2)*8; | 1184 else return score1 + ABS(score2)*8; |
1185 } | 1185 } |
1186 | 1186 |
1187 static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { | 1187 static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { |
1188 int tmp; | 1188 int tmp; |
1189 | 1189 |
1190 assert( (((int)pix) & 7) == 0); | 1190 assert( (((int)pix) & 7) == 0); |
1191 assert((line_size &7) ==0); | 1191 assert((line_size &7) ==0); |
1192 | 1192 |
1193 #define SUM(in0, in1, out0, out1) \ | 1193 #define SUM(in0, in1, out0, out1) \ |
1194 "movq (%0), %%mm2\n"\ | 1194 "movq (%0), %%mm2\n"\ |
1195 "movq 8(%0), %%mm3\n"\ | 1195 "movq 8(%0), %%mm3\n"\ |
1196 "add %2,%0\n"\ | 1196 "add %2,%0\n"\ |
1197 "movq %%mm2, " #out0 "\n"\ | 1197 "movq %%mm2, " #out0 "\n"\ |
1211 "paddw " #in1 ", " #in0 "\n"\ | 1211 "paddw " #in1 ", " #in0 "\n"\ |
1212 "paddw %%mm3, %%mm2\n"\ | 1212 "paddw %%mm3, %%mm2\n"\ |
1213 "paddw %%mm2, " #in0 "\n"\ | 1213 "paddw %%mm2, " #in0 "\n"\ |
1214 "paddw " #in0 ", %%mm6\n" | 1214 "paddw " #in0 ", %%mm6\n" |
1215 | 1215 |
1216 | 1216 |
1217 asm volatile ( | 1217 asm volatile ( |
1218 "movl %3,%%ecx\n" | 1218 "movl %3,%%ecx\n" |
1219 "pxor %%mm6,%%mm6\n" | 1219 "pxor %%mm6,%%mm6\n" |
1220 "pxor %%mm7,%%mm7\n" | 1220 "pxor %%mm7,%%mm7\n" |
1221 "movq (%0),%%mm0\n" | 1221 "movq (%0),%%mm0\n" |
1222 "movq 8(%0),%%mm1\n" | 1222 "movq 8(%0),%%mm1\n" |
1223 "add %2,%0\n" | 1223 "add %2,%0\n" |
1224 "subl $2, %%ecx\n" | 1224 "subl $2, %%ecx\n" |
1225 SUM(%%mm0, %%mm1, %%mm4, %%mm5) | 1225 SUM(%%mm0, %%mm1, %%mm4, %%mm5) |
1226 "1:\n" | 1226 "1:\n" |
1227 | 1227 |
1228 SUM(%%mm4, %%mm5, %%mm0, %%mm1) | 1228 SUM(%%mm4, %%mm5, %%mm0, %%mm1) |
1229 | 1229 |
1230 SUM(%%mm0, %%mm1, %%mm4, %%mm5) | 1230 SUM(%%mm0, %%mm1, %%mm4, %%mm5) |
1231 | 1231 |
1232 "subl $2, %%ecx\n" | 1232 "subl $2, %%ecx\n" |
1233 "jnz 1b\n" | 1233 "jnz 1b\n" |
1234 | 1234 |
1235 "movq %%mm6,%%mm0\n" | 1235 "movq %%mm6,%%mm0\n" |
1236 "psrlq $32, %%mm6\n" | 1236 "psrlq $32, %%mm6\n" |
1237 "paddw %%mm6,%%mm0\n" | 1237 "paddw %%mm6,%%mm0\n" |
1238 "movq %%mm0,%%mm6\n" | 1238 "movq %%mm0,%%mm6\n" |
1239 "psrlq $16, %%mm0\n" | 1239 "psrlq $16, %%mm0\n" |
1240 "paddw %%mm6,%%mm0\n" | 1240 "paddw %%mm6,%%mm0\n" |
1241 "movd %%mm0,%1\n" | 1241 "movd %%mm0,%1\n" |
1242 : "+r" (pix), "=r"(tmp) | 1242 : "+r" (pix), "=r"(tmp) |
1243 : "r" ((long)line_size) , "m" (h) | 1243 : "r" ((long)line_size) , "m" (h) |
1244 : "%ecx"); | 1244 : "%ecx"); |
1245 return tmp & 0xFFFF; | 1245 return tmp & 0xFFFF; |
1246 } | 1246 } |
1247 #undef SUM | 1247 #undef SUM |
1248 | 1248 |
1249 static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { | 1249 static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { |
1250 int tmp; | 1250 int tmp; |
1251 | 1251 |
1252 assert( (((int)pix) & 7) == 0); | 1252 assert( (((int)pix) & 7) == 0); |
1253 assert((line_size &7) ==0); | 1253 assert((line_size &7) ==0); |
1254 | 1254 |
1255 #define SUM(in0, in1, out0, out1) \ | 1255 #define SUM(in0, in1, out0, out1) \ |
1256 "movq (%0), " #out0 "\n"\ | 1256 "movq (%0), " #out0 "\n"\ |
1257 "movq 8(%0), " #out1 "\n"\ | 1257 "movq 8(%0), " #out1 "\n"\ |
1258 "add %2,%0\n"\ | 1258 "add %2,%0\n"\ |
1259 "psadbw " #out0 ", " #in0 "\n"\ | 1259 "psadbw " #out0 ", " #in0 "\n"\ |
1269 "movq 8(%0),%%mm1\n" | 1269 "movq 8(%0),%%mm1\n" |
1270 "add %2,%0\n" | 1270 "add %2,%0\n" |
1271 "subl $2, %%ecx\n" | 1271 "subl $2, %%ecx\n" |
1272 SUM(%%mm0, %%mm1, %%mm4, %%mm5) | 1272 SUM(%%mm0, %%mm1, %%mm4, %%mm5) |
1273 "1:\n" | 1273 "1:\n" |
1274 | 1274 |
1275 SUM(%%mm4, %%mm5, %%mm0, %%mm1) | 1275 SUM(%%mm4, %%mm5, %%mm0, %%mm1) |
1276 | 1276 |
1277 SUM(%%mm0, %%mm1, %%mm4, %%mm5) | 1277 SUM(%%mm0, %%mm1, %%mm4, %%mm5) |
1278 | 1278 |
1279 "subl $2, %%ecx\n" | 1279 "subl $2, %%ecx\n" |
1280 "jnz 1b\n" | 1280 "jnz 1b\n" |
1281 | 1281 |
1282 "movd %%mm6,%1\n" | 1282 "movd %%mm6,%1\n" |
1283 : "+r" (pix), "=r"(tmp) | 1283 : "+r" (pix), "=r"(tmp) |
1284 : "r" ((long)line_size) , "m" (h) | 1284 : "r" ((long)line_size) , "m" (h) |
1285 : "%ecx"); | 1285 : "%ecx"); |
1286 return tmp; | 1286 return tmp; |
1287 } | 1287 } |
1288 #undef SUM | 1288 #undef SUM |
1289 | 1289 |
1290 static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { | 1290 static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { |
1291 int tmp; | 1291 int tmp; |
1292 | 1292 |
1293 assert( (((int)pix1) & 7) == 0); | 1293 assert( (((int)pix1) & 7) == 0); |
1294 assert( (((int)pix2) & 7) == 0); | 1294 assert( (((int)pix2) & 7) == 0); |
1295 assert((line_size &7) ==0); | 1295 assert((line_size &7) ==0); |
1296 | 1296 |
1297 #define SUM(in0, in1, out0, out1) \ | 1297 #define SUM(in0, in1, out0, out1) \ |
1298 "movq (%0),%%mm2\n"\ | 1298 "movq (%0),%%mm2\n"\ |
1299 "movq (%1)," #out0 "\n"\ | 1299 "movq (%1)," #out0 "\n"\ |
1300 "movq 8(%0),%%mm3\n"\ | 1300 "movq 8(%0),%%mm3\n"\ |
1301 "movq 8(%1)," #out1 "\n"\ | 1301 "movq 8(%1)," #out1 "\n"\ |
1322 "paddw " #in1 ", " #in0 "\n"\ | 1322 "paddw " #in1 ", " #in0 "\n"\ |
1323 "paddw %%mm3, %%mm2\n"\ | 1323 "paddw %%mm3, %%mm2\n"\ |
1324 "paddw %%mm2, " #in0 "\n"\ | 1324 "paddw %%mm2, " #in0 "\n"\ |
1325 "paddw " #in0 ", %%mm6\n" | 1325 "paddw " #in0 ", %%mm6\n" |
1326 | 1326 |
1327 | 1327 |
1328 asm volatile ( | 1328 asm volatile ( |
1329 "movl %4,%%ecx\n" | 1329 "movl %4,%%ecx\n" |
1330 "pxor %%mm6,%%mm6\n" | 1330 "pxor %%mm6,%%mm6\n" |
1331 "pcmpeqw %%mm7,%%mm7\n" | 1331 "pcmpeqw %%mm7,%%mm7\n" |
1332 "psllw $15, %%mm7\n" | 1332 "psllw $15, %%mm7\n" |
1342 "psubb %%mm3, %%mm1\n" | 1342 "psubb %%mm3, %%mm1\n" |
1343 "pxor %%mm7, %%mm0\n" | 1343 "pxor %%mm7, %%mm0\n" |
1344 "pxor %%mm7, %%mm1\n" | 1344 "pxor %%mm7, %%mm1\n" |
1345 SUM(%%mm0, %%mm1, %%mm4, %%mm5) | 1345 SUM(%%mm0, %%mm1, %%mm4, %%mm5) |
1346 "1:\n" | 1346 "1:\n" |
1347 | 1347 |
1348 SUM(%%mm4, %%mm5, %%mm0, %%mm1) | 1348 SUM(%%mm4, %%mm5, %%mm0, %%mm1) |
1349 | 1349 |
1350 SUM(%%mm0, %%mm1, %%mm4, %%mm5) | 1350 SUM(%%mm0, %%mm1, %%mm4, %%mm5) |
1351 | 1351 |
1352 "subl $2, %%ecx\n" | 1352 "subl $2, %%ecx\n" |
1353 "jnz 1b\n" | 1353 "jnz 1b\n" |
1354 | 1354 |
1355 "movq %%mm6,%%mm0\n" | 1355 "movq %%mm6,%%mm0\n" |
1356 "psrlq $32, %%mm6\n" | 1356 "psrlq $32, %%mm6\n" |
1357 "paddw %%mm6,%%mm0\n" | 1357 "paddw %%mm6,%%mm0\n" |
1358 "movq %%mm0,%%mm6\n" | 1358 "movq %%mm0,%%mm6\n" |
1359 "psrlq $16, %%mm0\n" | 1359 "psrlq $16, %%mm0\n" |
1360 "paddw %%mm6,%%mm0\n" | 1360 "paddw %%mm6,%%mm0\n" |
1361 "movd %%mm0,%2\n" | 1361 "movd %%mm0,%2\n" |
1362 : "+r" (pix1), "+r" (pix2), "=r"(tmp) | 1362 : "+r" (pix1), "+r" (pix2), "=r"(tmp) |
1363 : "r" ((long)line_size) , "m" (h) | 1363 : "r" ((long)line_size) , "m" (h) |
1364 : "%ecx"); | 1364 : "%ecx"); |
1365 return tmp & 0x7FFF; | 1365 return tmp & 0x7FFF; |
1366 } | 1366 } |
1367 #undef SUM | 1367 #undef SUM |
1368 | 1368 |
1369 static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { | 1369 static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { |
1370 int tmp; | 1370 int tmp; |
1371 | 1371 |
1372 assert( (((int)pix1) & 7) == 0); | 1372 assert( (((int)pix1) & 7) == 0); |
1373 assert( (((int)pix2) & 7) == 0); | 1373 assert( (((int)pix2) & 7) == 0); |
1374 assert((line_size &7) ==0); | 1374 assert((line_size &7) ==0); |
1375 | 1375 |
1376 #define SUM(in0, in1, out0, out1) \ | 1376 #define SUM(in0, in1, out0, out1) \ |
1377 "movq (%0)," #out0 "\n"\ | 1377 "movq (%0)," #out0 "\n"\ |
1378 "movq (%1),%%mm2\n"\ | 1378 "movq (%1),%%mm2\n"\ |
1379 "movq 8(%0)," #out1 "\n"\ | 1379 "movq 8(%0)," #out1 "\n"\ |
1380 "movq 8(%1),%%mm3\n"\ | 1380 "movq 8(%1),%%mm3\n"\ |
1406 "psubb %%mm3, %%mm1\n" | 1406 "psubb %%mm3, %%mm1\n" |
1407 "pxor %%mm7, %%mm0\n" | 1407 "pxor %%mm7, %%mm0\n" |
1408 "pxor %%mm7, %%mm1\n" | 1408 "pxor %%mm7, %%mm1\n" |
1409 SUM(%%mm0, %%mm1, %%mm4, %%mm5) | 1409 SUM(%%mm0, %%mm1, %%mm4, %%mm5) |
1410 "1:\n" | 1410 "1:\n" |
1411 | 1411 |
1412 SUM(%%mm4, %%mm5, %%mm0, %%mm1) | 1412 SUM(%%mm4, %%mm5, %%mm0, %%mm1) |
1413 | 1413 |
1414 SUM(%%mm0, %%mm1, %%mm4, %%mm5) | 1414 SUM(%%mm0, %%mm1, %%mm4, %%mm5) |
1415 | 1415 |
1416 "subl $2, %%ecx\n" | 1416 "subl $2, %%ecx\n" |
1417 "jnz 1b\n" | 1417 "jnz 1b\n" |
1418 | 1418 |
1419 "movd %%mm6,%2\n" | 1419 "movd %%mm6,%2\n" |
1420 : "+r" (pix1), "+r" (pix2), "=r"(tmp) | 1420 : "+r" (pix1), "+r" (pix2), "=r"(tmp) |
1421 : "r" ((long)line_size) , "m" (h) | 1421 : "r" ((long)line_size) , "m" (h) |
1422 : "%ecx"); | 1422 : "%ecx"); |
1423 return tmp; | 1423 return tmp; |
1424 } | 1424 } |
1425 #undef SUM | 1425 #undef SUM |
1447 } | 1447 } |
1448 | 1448 |
1449 static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ | 1449 static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ |
1450 long i=0; | 1450 long i=0; |
1451 uint8_t l, lt; | 1451 uint8_t l, lt; |
1452 | 1452 |
1453 asm volatile( | 1453 asm volatile( |
1454 "1: \n\t" | 1454 "1: \n\t" |
1455 "movq -1(%1, %0), %%mm0 \n\t" // LT | 1455 "movq -1(%1, %0), %%mm0 \n\t" // LT |
1456 "movq (%1, %0), %%mm1 \n\t" // T | 1456 "movq (%1, %0), %%mm1 \n\t" // T |
1457 "movq -1(%2, %0), %%mm2 \n\t" // L | 1457 "movq -1(%2, %0), %%mm2 \n\t" // L |
1460 "psubb %%mm0, %%mm2 \n\t" | 1460 "psubb %%mm0, %%mm2 \n\t" |
1461 "paddb %%mm1, %%mm2 \n\t" // L + T - LT | 1461 "paddb %%mm1, %%mm2 \n\t" // L + T - LT |
1462 "movq %%mm4, %%mm5 \n\t" // L | 1462 "movq %%mm4, %%mm5 \n\t" // L |
1463 "pmaxub %%mm1, %%mm4 \n\t" // max(T, L) | 1463 "pmaxub %%mm1, %%mm4 \n\t" // max(T, L) |
1464 "pminub %%mm5, %%mm1 \n\t" // min(T, L) | 1464 "pminub %%mm5, %%mm1 \n\t" // min(T, L) |
1465 "pminub %%mm2, %%mm4 \n\t" | 1465 "pminub %%mm2, %%mm4 \n\t" |
1466 "pmaxub %%mm1, %%mm4 \n\t" | 1466 "pmaxub %%mm1, %%mm4 \n\t" |
1467 "psubb %%mm4, %%mm3 \n\t" // dst - pred | 1467 "psubb %%mm4, %%mm3 \n\t" // dst - pred |
1468 "movq %%mm3, (%3, %0) \n\t" | 1468 "movq %%mm3, (%3, %0) \n\t" |
1469 "add $8, %0 \n\t" | 1469 "add $8, %0 \n\t" |
1470 "cmp %4, %0 \n\t" | 1470 "cmp %4, %0 \n\t" |
1473 : "r"(src1), "r"(src2), "r"(dst), "r"((long)w) | 1473 : "r"(src1), "r"(src2), "r"(dst), "r"((long)w) |
1474 ); | 1474 ); |
1475 | 1475 |
1476 l= *left; | 1476 l= *left; |
1477 lt= *left_top; | 1477 lt= *left_top; |
1478 | 1478 |
1479 dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF); | 1479 dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF); |
1480 | 1480 |
1481 *left_top= src1[w-1]; | 1481 *left_top= src1[w-1]; |
1482 *left = src2[w-1]; | 1482 *left = src2[w-1]; |
1483 } | 1483 } |
1484 | 1484 |
1485 #define LBUTTERFLY2(a1,b1,a2,b2)\ | 1485 #define LBUTTERFLY2(a1,b1,a2,b2)\ |
1519 #define MMABS_SUM_MMX2(a,z, sum)\ | 1519 #define MMABS_SUM_MMX2(a,z, sum)\ |
1520 "pxor " #z ", " #z " \n\t"\ | 1520 "pxor " #z ", " #z " \n\t"\ |
1521 "psubw " #a ", " #z " \n\t"\ | 1521 "psubw " #a ", " #z " \n\t"\ |
1522 "pmaxsw " #z ", " #a " \n\t"\ | 1522 "pmaxsw " #z ", " #a " \n\t"\ |
1523 "paddusw " #a ", " #sum " \n\t" | 1523 "paddusw " #a ", " #sum " \n\t" |
1524 | 1524 |
1525 #define SBUTTERFLY(a,b,t,n)\ | 1525 #define SBUTTERFLY(a,b,t,n)\ |
1526 "movq " #a ", " #t " \n\t" /* abcd */\ | 1526 "movq " #a ", " #t " \n\t" /* abcd */\ |
1527 "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ | 1527 "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ |
1528 "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ | 1528 "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ |
1529 | 1529 |
1546 "movq "#d", "#o"+48(%1) \n\t"\ | 1546 "movq "#d", "#o"+48(%1) \n\t"\ |
1547 | 1547 |
1548 static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ | 1548 static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1549 uint64_t temp[16] __align8; | 1549 uint64_t temp[16] __align8; |
1550 int sum=0; | 1550 int sum=0; |
1551 | 1551 |
1552 assert(h==8); | 1552 assert(h==8); |
1553 | 1553 |
1554 diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); | 1554 diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); |
1555 | 1555 |
1556 asm volatile( | 1556 asm volatile( |
1557 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) | 1557 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) |
1558 LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7) | 1558 LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7) |
1559 | 1559 |
1560 HADAMARD48 | 1560 HADAMARD48 |
1561 | 1561 |
1562 "movq %%mm7, 112(%1) \n\t" | 1562 "movq %%mm7, 112(%1) \n\t" |
1563 | 1563 |
1564 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) | 1564 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) |
1565 STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2) | 1565 STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2) |
1566 | 1566 |
1567 "movq 112(%1), %%mm7 \n\t" | 1567 "movq 112(%1), %%mm7 \n\t" |
1568 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) | 1568 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) |
1569 STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6) | 1569 STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6) |
1570 | 1570 |
1571 LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3) | 1571 LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3) |
1572 LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) | 1572 LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) |
1573 | 1573 |
1574 HADAMARD48 | 1574 HADAMARD48 |
1575 | 1575 |
1576 "movq %%mm7, 120(%1) \n\t" | 1576 "movq %%mm7, 120(%1) \n\t" |
1577 | 1577 |
1578 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) | 1578 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) |
1579 STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2) | 1579 STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2) |
1580 | 1580 |
1581 "movq 120(%1), %%mm7 \n\t" | 1581 "movq 120(%1), %%mm7 \n\t" |
1582 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) | 1582 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) |
1583 "movq %%mm7, %%mm5 \n\t"//FIXME remove | 1583 "movq %%mm7, %%mm5 \n\t"//FIXME remove |
1584 "movq %%mm6, %%mm7 \n\t" | 1584 "movq %%mm6, %%mm7 \n\t" |
1585 "movq %%mm0, %%mm6 \n\t" | 1585 "movq %%mm0, %%mm6 \n\t" |
1586 // STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove | 1586 // STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove |
1587 | 1587 |
1588 LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3) | 1588 LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3) |
1589 // LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) | 1589 // LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) |
1590 | 1590 |
1591 HADAMARD48 | 1591 HADAMARD48 |
1592 "movq %%mm7, 64(%1) \n\t" | 1592 "movq %%mm7, 64(%1) \n\t" |
1593 MMABS(%%mm0, %%mm7) | 1593 MMABS(%%mm0, %%mm7) |
1594 MMABS_SUM(%%mm1, %%mm7, %%mm0) | 1594 MMABS_SUM(%%mm1, %%mm7, %%mm0) |
1595 MMABS_SUM(%%mm2, %%mm7, %%mm0) | 1595 MMABS_SUM(%%mm2, %%mm7, %%mm0) |
1598 MMABS_SUM(%%mm5, %%mm7, %%mm0) | 1598 MMABS_SUM(%%mm5, %%mm7, %%mm0) |
1599 MMABS_SUM(%%mm6, %%mm7, %%mm0) | 1599 MMABS_SUM(%%mm6, %%mm7, %%mm0) |
1600 "movq 64(%1), %%mm1 \n\t" | 1600 "movq 64(%1), %%mm1 \n\t" |
1601 MMABS_SUM(%%mm1, %%mm7, %%mm0) | 1601 MMABS_SUM(%%mm1, %%mm7, %%mm0) |
1602 "movq %%mm0, 64(%1) \n\t" | 1602 "movq %%mm0, 64(%1) \n\t" |
1603 | 1603 |
1604 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) | 1604 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) |
1605 LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7) | 1605 LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7) |
1606 | 1606 |
1607 HADAMARD48 | 1607 HADAMARD48 |
1608 "movq %%mm7, (%1) \n\t" | 1608 "movq %%mm7, (%1) \n\t" |
1609 MMABS(%%mm0, %%mm7) | 1609 MMABS(%%mm0, %%mm7) |
1610 MMABS_SUM(%%mm1, %%mm7, %%mm0) | 1610 MMABS_SUM(%%mm1, %%mm7, %%mm0) |
1611 MMABS_SUM(%%mm2, %%mm7, %%mm0) | 1611 MMABS_SUM(%%mm2, %%mm7, %%mm0) |
1615 MMABS_SUM(%%mm6, %%mm7, %%mm0) | 1615 MMABS_SUM(%%mm6, %%mm7, %%mm0) |
1616 "movq (%1), %%mm1 \n\t" | 1616 "movq (%1), %%mm1 \n\t" |
1617 MMABS_SUM(%%mm1, %%mm7, %%mm0) | 1617 MMABS_SUM(%%mm1, %%mm7, %%mm0) |
1618 "movq 64(%1), %%mm1 \n\t" | 1618 "movq 64(%1), %%mm1 \n\t" |
1619 MMABS_SUM(%%mm1, %%mm7, %%mm0) | 1619 MMABS_SUM(%%mm1, %%mm7, %%mm0) |
1620 | 1620 |
1621 "movq %%mm0, %%mm1 \n\t" | 1621 "movq %%mm0, %%mm1 \n\t" |
1622 "psrlq $32, %%mm0 \n\t" | 1622 "psrlq $32, %%mm0 \n\t" |
1623 "paddusw %%mm1, %%mm0 \n\t" | 1623 "paddusw %%mm1, %%mm0 \n\t" |
1624 "movq %%mm0, %%mm1 \n\t" | 1624 "movq %%mm0, %%mm1 \n\t" |
1625 "psrlq $16, %%mm0 \n\t" | 1625 "psrlq $16, %%mm0 \n\t" |
1626 "paddusw %%mm1, %%mm0 \n\t" | 1626 "paddusw %%mm1, %%mm0 \n\t" |
1627 "movd %%mm0, %0 \n\t" | 1627 "movd %%mm0, %0 \n\t" |
1628 | 1628 |
1629 : "=r" (sum) | 1629 : "=r" (sum) |
1630 : "r"(temp) | 1630 : "r"(temp) |
1631 ); | 1631 ); |
1632 return sum&0xFFFF; | 1632 return sum&0xFFFF; |
1633 } | 1633 } |
1634 | 1634 |
1635 static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ | 1635 static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1636 uint64_t temp[16] __align8; | 1636 uint64_t temp[16] __align8; |
1637 int sum=0; | 1637 int sum=0; |
1638 | 1638 |
1639 assert(h==8); | 1639 assert(h==8); |
1640 | 1640 |
1641 diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); | 1641 diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); |
1642 | 1642 |
1643 asm volatile( | 1643 asm volatile( |
1644 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) | 1644 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) |
1645 LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7) | 1645 LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7) |
1646 | 1646 |
1647 HADAMARD48 | 1647 HADAMARD48 |
1648 | 1648 |
1649 "movq %%mm7, 112(%1) \n\t" | 1649 "movq %%mm7, 112(%1) \n\t" |
1650 | 1650 |
1651 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) | 1651 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) |
1652 STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2) | 1652 STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2) |
1653 | 1653 |
1654 "movq 112(%1), %%mm7 \n\t" | 1654 "movq 112(%1), %%mm7 \n\t" |
1655 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) | 1655 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) |
1656 STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6) | 1656 STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6) |
1657 | 1657 |
1658 LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3) | 1658 LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3) |
1659 LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) | 1659 LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) |
1660 | 1660 |
1661 HADAMARD48 | 1661 HADAMARD48 |
1662 | 1662 |
1663 "movq %%mm7, 120(%1) \n\t" | 1663 "movq %%mm7, 120(%1) \n\t" |
1664 | 1664 |
1665 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) | 1665 TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) |
1666 STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2) | 1666 STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2) |
1667 | 1667 |
1668 "movq 120(%1), %%mm7 \n\t" | 1668 "movq 120(%1), %%mm7 \n\t" |
1669 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) | 1669 TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) |
1670 "movq %%mm7, %%mm5 \n\t"//FIXME remove | 1670 "movq %%mm7, %%mm5 \n\t"//FIXME remove |
1671 "movq %%mm6, %%mm7 \n\t" | 1671 "movq %%mm6, %%mm7 \n\t" |
1672 "movq %%mm0, %%mm6 \n\t" | 1672 "movq %%mm0, %%mm6 \n\t" |
1673 // STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove | 1673 // STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove |
1674 | 1674 |
1675 LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3) | 1675 LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3) |
1676 // LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) | 1676 // LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) |
1677 | 1677 |
1678 HADAMARD48 | 1678 HADAMARD48 |
1679 "movq %%mm7, 64(%1) \n\t" | 1679 "movq %%mm7, 64(%1) \n\t" |
1680 MMABS_MMX2(%%mm0, %%mm7) | 1680 MMABS_MMX2(%%mm0, %%mm7) |
1681 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) | 1681 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) |
1682 MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0) | 1682 MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0) |
1685 MMABS_SUM_MMX2(%%mm5, %%mm7, %%mm0) | 1685 MMABS_SUM_MMX2(%%mm5, %%mm7, %%mm0) |
1686 MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0) | 1686 MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0) |
1687 "movq 64(%1), %%mm1 \n\t" | 1687 "movq 64(%1), %%mm1 \n\t" |
1688 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) | 1688 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) |
1689 "movq %%mm0, 64(%1) \n\t" | 1689 "movq %%mm0, 64(%1) \n\t" |
1690 | 1690 |
1691 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) | 1691 LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) |
1692 LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7) | 1692 LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7) |
1693 | 1693 |
1694 HADAMARD48 | 1694 HADAMARD48 |
1695 "movq %%mm7, (%1) \n\t" | 1695 "movq %%mm7, (%1) \n\t" |
1696 MMABS_MMX2(%%mm0, %%mm7) | 1696 MMABS_MMX2(%%mm0, %%mm7) |
1697 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) | 1697 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) |
1698 MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0) | 1698 MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0) |
1702 MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0) | 1702 MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0) |
1703 "movq (%1), %%mm1 \n\t" | 1703 "movq (%1), %%mm1 \n\t" |
1704 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) | 1704 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) |
1705 "movq 64(%1), %%mm1 \n\t" | 1705 "movq 64(%1), %%mm1 \n\t" |
1706 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) | 1706 MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0) |
1707 | 1707 |
1708 "pshufw $0x0E, %%mm0, %%mm1 \n\t" | 1708 "pshufw $0x0E, %%mm0, %%mm1 \n\t" |
1709 "paddusw %%mm1, %%mm0 \n\t" | 1709 "paddusw %%mm1, %%mm0 \n\t" |
1710 "pshufw $0x01, %%mm0, %%mm1 \n\t" | 1710 "pshufw $0x01, %%mm0, %%mm1 \n\t" |
1711 "paddusw %%mm1, %%mm0 \n\t" | 1711 "paddusw %%mm1, %%mm0 \n\t" |
1712 "movd %%mm0, %0 \n\t" | 1712 "movd %%mm0, %0 \n\t" |
1713 | 1713 |
1714 : "=r" (sum) | 1714 : "=r" (sum) |
1715 : "r"(temp) | 1715 : "r"(temp) |
1716 ); | 1716 ); |
1717 return sum&0xFFFF; | 1717 return sum&0xFFFF; |
1718 } | 1718 } |
2403 c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ | 2403 c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ |
2404 c->avg_ ## postfix1 = avg_ ## postfix2; | 2404 c->avg_ ## postfix1 = avg_ ## postfix2; |
2405 | 2405 |
2406 static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ | 2406 static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ |
2407 long i=0; | 2407 long i=0; |
2408 | 2408 |
2409 assert(ABS(scale) < 256); | 2409 assert(ABS(scale) < 256); |
2410 scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT; | 2410 scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT; |
2411 | 2411 |
2412 asm volatile( | 2412 asm volatile( |
2413 "pcmpeqw %%mm6, %%mm6 \n\t" // -1w | 2413 "pcmpeqw %%mm6, %%mm6 \n\t" // -1w |
2414 "psrlw $15, %%mm6 \n\t" // 1w | 2414 "psrlw $15, %%mm6 \n\t" // 1w |
2415 "pxor %%mm7, %%mm7 \n\t" | 2415 "pxor %%mm7, %%mm7 \n\t" |
2416 "movd %4, %%mm5 \n\t" | 2416 "movd %4, %%mm5 \n\t" |
2417 "punpcklwd %%mm5, %%mm5 \n\t" | 2417 "punpcklwd %%mm5, %%mm5 \n\t" |
2418 "punpcklwd %%mm5, %%mm5 \n\t" | 2418 "punpcklwd %%mm5, %%mm5 \n\t" |
2419 "1: \n\t" | 2419 "1: \n\t" |
2420 "movq (%1, %0), %%mm0 \n\t" | 2420 "movq (%1, %0), %%mm0 \n\t" |
2421 "movq 8(%1, %0), %%mm1 \n\t" | 2421 "movq 8(%1, %0), %%mm1 \n\t" |
2422 "pmulhw %%mm5, %%mm0 \n\t" | 2422 "pmulhw %%mm5, %%mm0 \n\t" |
2423 "pmulhw %%mm5, %%mm1 \n\t" | 2423 "pmulhw %%mm5, %%mm1 \n\t" |
2424 "paddw %%mm6, %%mm0 \n\t" | 2424 "paddw %%mm6, %%mm0 \n\t" |
2425 "paddw %%mm6, %%mm1 \n\t" | 2425 "paddw %%mm6, %%mm1 \n\t" |
2442 "movq %%mm7, %%mm6 \n\t" | 2442 "movq %%mm7, %%mm6 \n\t" |
2443 "psrlq $32, %%mm7 \n\t" | 2443 "psrlq $32, %%mm7 \n\t" |
2444 "paddd %%mm6, %%mm7 \n\t" | 2444 "paddd %%mm6, %%mm7 \n\t" |
2445 "psrld $2, %%mm7 \n\t" | 2445 "psrld $2, %%mm7 \n\t" |
2446 "movd %%mm7, %0 \n\t" | 2446 "movd %%mm7, %0 \n\t" |
2447 | 2447 |
2448 : "+r" (i) | 2448 : "+r" (i) |
2449 : "r"(basis), "r"(rem), "r"(weight), "g"(scale) | 2449 : "r"(basis), "r"(rem), "r"(weight), "g"(scale) |
2450 ); | 2450 ); |
2451 return i; | 2451 return i; |
2452 } | 2452 } |
2453 | 2453 |
2454 static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){ | 2454 static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){ |
2455 long i=0; | 2455 long i=0; |
2456 | 2456 |
2457 if(ABS(scale) < 256){ | 2457 if(ABS(scale) < 256){ |
2458 scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT; | 2458 scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT; |
2459 asm volatile( | 2459 asm volatile( |
2460 "pcmpeqw %%mm6, %%mm6 \n\t" // -1w | 2460 "pcmpeqw %%mm6, %%mm6 \n\t" // -1w |
2461 "psrlw $15, %%mm6 \n\t" // 1w | 2461 "psrlw $15, %%mm6 \n\t" // 1w |
2462 "movd %3, %%mm5 \n\t" | 2462 "movd %3, %%mm5 \n\t" |
2463 "punpcklwd %%mm5, %%mm5 \n\t" | 2463 "punpcklwd %%mm5, %%mm5 \n\t" |
2464 "punpcklwd %%mm5, %%mm5 \n\t" | 2464 "punpcklwd %%mm5, %%mm5 \n\t" |
2465 "1: \n\t" | 2465 "1: \n\t" |
2466 "movq (%1, %0), %%mm0 \n\t" | 2466 "movq (%1, %0), %%mm0 \n\t" |
2467 "movq 8(%1, %0), %%mm1 \n\t" | 2467 "movq 8(%1, %0), %%mm1 \n\t" |
2468 "pmulhw %%mm5, %%mm0 \n\t" | 2468 "pmulhw %%mm5, %%mm0 \n\t" |
2469 "pmulhw %%mm5, %%mm1 \n\t" | 2469 "pmulhw %%mm5, %%mm1 \n\t" |
2470 "paddw %%mm6, %%mm0 \n\t" | 2470 "paddw %%mm6, %%mm0 \n\t" |
2471 "paddw %%mm6, %%mm1 \n\t" | 2471 "paddw %%mm6, %%mm1 \n\t" |
2472 "psraw $1, %%mm0 \n\t" | 2472 "psraw $1, %%mm0 \n\t" |
2473 "psraw $1, %%mm1 \n\t" | 2473 "psraw $1, %%mm1 \n\t" |
2474 "paddw (%2, %0), %%mm0 \n\t" | 2474 "paddw (%2, %0), %%mm0 \n\t" |
2475 "paddw 8(%2, %0), %%mm1 \n\t" | 2475 "paddw 8(%2, %0), %%mm1 \n\t" |
2476 "movq %%mm0, (%2, %0) \n\t" | 2476 "movq %%mm0, (%2, %0) \n\t" |
2477 "movq %%mm1, 8(%2, %0) \n\t" | 2477 "movq %%mm1, 8(%2, %0) \n\t" |
2478 "add $16, %0 \n\t" | 2478 "add $16, %0 \n\t" |
2479 "cmp $128, %0 \n\t" //FIXME optimize & bench | 2479 "cmp $128, %0 \n\t" //FIXME optimize & bench |
2480 " jb 1b \n\t" | 2480 " jb 1b \n\t" |
2481 | 2481 |
2482 : "+r" (i) | 2482 : "+r" (i) |
2483 : "r"(basis), "r"(rem), "g"(scale) | 2483 : "r"(basis), "r"(rem), "g"(scale) |
2484 ); | 2484 ); |
2485 }else{ | 2485 }else{ |
2486 for(i=0; i<8*8; i++){ | 2486 for(i=0; i<8*8; i++){ |
2487 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | 2487 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); |
2488 } | 2488 } |
2489 } | 2489 } |
2490 } | 2490 } |
2491 | 2491 |
2492 #include "h264dsp_mmx.c" | 2492 #include "h264dsp_mmx.c" |
2493 | 2493 |
2494 /* external functions, from idct_mmx.c */ | 2494 /* external functions, from idct_mmx.c */ |
2495 void ff_mmx_idct(DCTELEM *block); | 2495 void ff_mmx_idct(DCTELEM *block); |
2496 void ff_mmxext_idct(DCTELEM *block); | 2496 void ff_mmxext_idct(DCTELEM *block); |
2497 | 2497 |
2498 void ff_vp3_idct_sse2(int16_t *input_data); | 2498 void ff_vp3_idct_sse2(int16_t *input_data); |
2561 { | 2561 { |
2562 ff_idct_xvid_mmx2 (block); | 2562 ff_idct_xvid_mmx2 (block); |
2563 add_pixels_clamped_mmx(block, dest, line_size); | 2563 add_pixels_clamped_mmx(block, dest, line_size); |
2564 } | 2564 } |
2565 #endif | 2565 #endif |
2566 | 2566 |
2567 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | 2567 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
2568 { | 2568 { |
2569 mm_flags = mm_support(); | 2569 mm_flags = mm_support(); |
2570 | 2570 |
2571 if (avctx->dsp_mask) { | 2571 if (avctx->dsp_mask) { |
2699 | 2699 |
2700 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; | 2700 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; |
2701 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; | 2701 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; |
2702 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; | 2702 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; |
2703 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; | 2703 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; |
2704 | 2704 |
2705 c->add_bytes= add_bytes_mmx; | 2705 c->add_bytes= add_bytes_mmx; |
2706 #ifdef CONFIG_ENCODERS | 2706 #ifdef CONFIG_ENCODERS |
2707 c->diff_bytes= diff_bytes_mmx; | 2707 c->diff_bytes= diff_bytes_mmx; |
2708 | 2708 |
2709 c->hadamard8_diff[0]= hadamard8_diff16_mmx; | 2709 c->hadamard8_diff[0]= hadamard8_diff16_mmx; |
2710 c->hadamard8_diff[1]= hadamard8_diff_mmx; | 2710 c->hadamard8_diff[1]= hadamard8_diff_mmx; |
2711 | 2711 |
2712 c->pix_norm1 = pix_norm1_mmx; | 2712 c->pix_norm1 = pix_norm1_mmx; |
2713 c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx; | 2713 c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx; |
2714 c->sse[1] = sse8_mmx; | 2714 c->sse[1] = sse8_mmx; |
2715 c->vsad[4]= vsad_intra16_mmx; | 2715 c->vsad[4]= vsad_intra16_mmx; |
2716 | 2716 |
2717 c->nsse[0] = nsse16_mmx; | 2717 c->nsse[0] = nsse16_mmx; |
2718 c->nsse[1] = nsse8_mmx; | 2718 c->nsse[1] = nsse8_mmx; |
2719 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 2719 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
2720 c->vsad[0] = vsad16_mmx; | 2720 c->vsad[0] = vsad16_mmx; |
2721 } | 2721 } |
2722 | 2722 |
2723 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 2723 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
2724 c->try_8x8basis= try_8x8basis_mmx; | 2724 c->try_8x8basis= try_8x8basis_mmx; |
2725 } | 2725 } |
2726 c->add_8x8basis= add_8x8basis_mmx; | 2726 c->add_8x8basis= add_8x8basis_mmx; |
2727 | 2727 |
2728 #endif //CONFIG_ENCODERS | 2728 #endif //CONFIG_ENCODERS |
2729 | 2729 |
2730 c->h263_v_loop_filter= h263_v_loop_filter_mmx; | 2730 c->h263_v_loop_filter= h263_v_loop_filter_mmx; |
2731 c->h263_h_loop_filter= h263_h_loop_filter_mmx; | 2731 c->h263_h_loop_filter= h263_h_loop_filter_mmx; |
2732 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx; | 2732 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx; |
2733 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx; | 2733 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx; |
2734 | 2734 |
2735 if (mm_flags & MM_MMXEXT) { | 2735 if (mm_flags & MM_MMXEXT) { |
2736 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | 2736 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
2737 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | 2737 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
2738 | 2738 |
2739 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; | 2739 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; |
2943 | 2943 |
2944 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; | 2944 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; |
2945 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; | 2945 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; |
2946 } | 2946 } |
2947 } | 2947 } |
2948 | 2948 |
2949 #ifdef CONFIG_ENCODERS | 2949 #ifdef CONFIG_ENCODERS |
2950 dsputil_init_pix_mmx(c, avctx); | 2950 dsputil_init_pix_mmx(c, avctx); |
2951 #endif //CONFIG_ENCODERS | 2951 #endif //CONFIG_ENCODERS |
2952 #if 0 | 2952 #if 0 |
2953 // for speed testing | 2953 // for speed testing |