Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 402:92d143c2d5a8 libavcodec
removed unused code
author | glantau |
---|---|
date | Mon, 20 May 2002 16:25:09 +0000 |
parents | bf164fce2c14 |
children | 1c3f42442fba |
comparison
equal
deleted
inserted
replaced
401:e20655449d4a | 402:92d143c2d5a8 |
---|---|
905 pix += line_size; | 905 pix += line_size; |
906 p += line_size; | 906 p += line_size; |
907 } while(--h); | 907 } while(--h); |
908 } | 908 } |
909 | 909 |
910 static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
911 { | |
912 DCTELEM *p; | |
913 const UINT8 *pix; | |
914 p = block; | |
915 pix = pixels; | |
916 MOVQ_ZERO(mm7); | |
917 do { | |
918 __asm __volatile( | |
919 "movq %0, %%mm0\n\t" | |
920 "movq %1, %%mm2\n\t" | |
921 "movq 8%0, %%mm1\n\t" | |
922 "movq %%mm2, %%mm3\n\t" | |
923 "punpcklbw %%mm7, %%mm2\n\t" | |
924 "punpckhbw %%mm7, %%mm3\n\t" | |
925 "psubsw %%mm2, %%mm0\n\t" | |
926 "psubsw %%mm3, %%mm1\n\t" | |
927 "movq %%mm0, %0\n\t" | |
928 "movq %%mm1, 8%0\n\t" | |
929 :"+m"(*p) | |
930 :"m"(*pix) | |
931 :"memory"); | |
932 pix += line_size; | |
933 p += 8; | |
934 } while (--h); | |
935 } | |
936 | |
937 static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
938 { | |
939 DCTELEM *p; | |
940 const UINT8 *pix; | |
941 p = block; | |
942 pix = pixels; | |
943 MOVQ_ZERO(mm7); | |
944 MOVQ_WONE(mm6); | |
945 JUMPALIGN(); | |
946 do { | |
947 __asm __volatile( | |
948 "movq %0, %%mm0\n\t" | |
949 "movq %1, %%mm2\n\t" | |
950 "movq 8%0, %%mm1\n\t" | |
951 "movq 1%1, %%mm4\n\t" | |
952 "movq %%mm2, %%mm3\n\t" | |
953 "movq %%mm4, %%mm5\n\t" | |
954 "punpcklbw %%mm7, %%mm2\n\t" | |
955 "punpckhbw %%mm7, %%mm3\n\t" | |
956 "punpcklbw %%mm7, %%mm4\n\t" | |
957 "punpckhbw %%mm7, %%mm5\n\t" | |
958 "paddusw %%mm4, %%mm2\n\t" | |
959 "paddusw %%mm5, %%mm3\n\t" | |
960 "paddusw %%mm6, %%mm2\n\t" | |
961 "paddusw %%mm6, %%mm3\n\t" | |
962 "psrlw $1, %%mm2\n\t" | |
963 "psrlw $1, %%mm3\n\t" | |
964 "psubsw %%mm2, %%mm0\n\t" | |
965 "psubsw %%mm3, %%mm1\n\t" | |
966 "movq %%mm0, %0\n\t" | |
967 "movq %%mm1, 8%0\n\t" | |
968 :"+m"(*p) | |
969 :"m"(*pix) | |
970 :"memory"); | |
971 pix += line_size; | |
972 p += 8; | |
973 } while (--h); | |
974 } | |
975 | |
976 static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
977 { | |
978 DCTELEM *p; | |
979 const UINT8 *pix; | |
980 p = block; | |
981 pix = pixels; | |
982 MOVQ_ZERO(mm7); | |
983 MOVQ_WONE(mm6); | |
984 do { | |
985 __asm __volatile( | |
986 "movq %0, %%mm0\n\t" | |
987 "movq %1, %%mm2\n\t" | |
988 "movq 8%0, %%mm1\n\t" | |
989 "movq %2, %%mm4\n\t" | |
990 "movq %%mm2, %%mm3\n\t" | |
991 "movq %%mm4, %%mm5\n\t" | |
992 "punpcklbw %%mm7, %%mm2\n\t" | |
993 "punpckhbw %%mm7, %%mm3\n\t" | |
994 "punpcklbw %%mm7, %%mm4\n\t" | |
995 "punpckhbw %%mm7, %%mm5\n\t" | |
996 "paddusw %%mm4, %%mm2\n\t" | |
997 "paddusw %%mm5, %%mm3\n\t" | |
998 "paddusw %%mm6, %%mm2\n\t" | |
999 "paddusw %%mm6, %%mm3\n\t" | |
1000 "psrlw $1, %%mm2\n\t" | |
1001 "psrlw $1, %%mm3\n\t" | |
1002 "psubsw %%mm2, %%mm0\n\t" | |
1003 "psubsw %%mm3, %%mm1\n\t" | |
1004 "movq %%mm0, %0\n\t" | |
1005 "movq %%mm1, 8%0\n\t" | |
1006 :"+m"(*p) | |
1007 :"m"(*pix), "m"(*(pix+line_size)) | |
1008 :"memory"); | |
1009 pix += line_size; | |
1010 p += 8; | |
1011 } while (--h); | |
1012 } | |
1013 | |
1014 static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) | |
1015 { | |
1016 DCTELEM *p; | |
1017 const UINT8 *pix; | |
1018 p = block; | |
1019 pix = pixels; | |
1020 MOVQ_ZERO(mm7); | |
1021 MOVQ_WTWO(mm6); | |
1022 JUMPALIGN(); | |
1023 do { | |
1024 __asm __volatile( | |
1025 "movq %1, %%mm0\n\t" | |
1026 "movq %2, %%mm1\n\t" | |
1027 "movq 1%1, %%mm4\n\t" | |
1028 "movq 1%2, %%mm5\n\t" | |
1029 "movq %%mm0, %%mm2\n\t" | |
1030 "movq %%mm1, %%mm3\n\t" | |
1031 "punpcklbw %%mm7, %%mm0\n\t" | |
1032 "punpcklbw %%mm7, %%mm1\n\t" | |
1033 "punpckhbw %%mm7, %%mm2\n\t" | |
1034 "punpckhbw %%mm7, %%mm3\n\t" | |
1035 "paddusw %%mm1, %%mm0\n\t" | |
1036 "paddusw %%mm3, %%mm2\n\t" | |
1037 "movq %%mm4, %%mm1\n\t" | |
1038 "movq %%mm5, %%mm3\n\t" | |
1039 "punpcklbw %%mm7, %%mm4\n\t" | |
1040 "punpcklbw %%mm7, %%mm5\n\t" | |
1041 "punpckhbw %%mm7, %%mm1\n\t" | |
1042 "punpckhbw %%mm7, %%mm3\n\t" | |
1043 "paddusw %%mm5, %%mm4\n\t" | |
1044 "paddusw %%mm3, %%mm1\n\t" | |
1045 "paddusw %%mm6, %%mm4\n\t" | |
1046 "paddusw %%mm6, %%mm1\n\t" | |
1047 "paddusw %%mm4, %%mm0\n\t" | |
1048 "paddusw %%mm1, %%mm2\n\t" | |
1049 "movq %0, %%mm1\n\t" | |
1050 "movq 8%0, %%mm3\n\t" | |
1051 "psrlw $2, %%mm0\n\t" | |
1052 "psrlw $2, %%mm2\n\t" | |
1053 "psubsw %%mm0, %%mm1\n\t" | |
1054 "psubsw %%mm2, %%mm3\n\t" | |
1055 "movq %%mm1, %0\n\t" | |
1056 "movq %%mm3, 8%0\n\t" | |
1057 :"+m"(*p) | |
1058 :"m"(*pix), | |
1059 "m"(*(pix+line_size)) | |
1060 :"memory"); | |
1061 pix += line_size; | |
1062 p += 8 ; | |
1063 } while(--h); | |
1064 } | |
1065 | |
1066 static void clear_blocks_mmx(DCTELEM *blocks) | 910 static void clear_blocks_mmx(DCTELEM *blocks) |
1067 { | 911 { |
1068 asm volatile( | 912 asm volatile( |
1069 "pxor %%mm7, %%mm7 \n\t" | 913 "pxor %%mm7, %%mm7 \n\t" |
1070 "movl $-128*6, %%eax \n\t" | 914 "movl $-128*6, %%eax \n\t" |
1137 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx; | 981 avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx; |
1138 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx; | 982 avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx; |
1139 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx; | 983 avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx; |
1140 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx; | 984 avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx; |
1141 | 985 |
1142 sub_pixels_tab[0] = sub_pixels_mmx; | |
1143 sub_pixels_tab[1] = sub_pixels_x2_mmx; | |
1144 sub_pixels_tab[2] = sub_pixels_y2_mmx; | |
1145 sub_pixels_tab[3] = sub_pixels_xy2_mmx; | |
1146 | |
1147 if (mm_flags & MM_MMXEXT) { | 986 if (mm_flags & MM_MMXEXT) { |
1148 pix_abs16x16 = pix_abs16x16_mmx2; | 987 pix_abs16x16 = pix_abs16x16_mmx2; |
1149 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; | 988 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; |
1150 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; | 989 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; |
1151 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; | 990 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; |
1162 | 1001 |
1163 avg_pixels_tab[0] = avg_pixels_mmx2; | 1002 avg_pixels_tab[0] = avg_pixels_mmx2; |
1164 avg_pixels_tab[1] = avg_pixels_x2_mmx2; | 1003 avg_pixels_tab[1] = avg_pixels_x2_mmx2; |
1165 avg_pixels_tab[2] = avg_pixels_y2_mmx2; | 1004 avg_pixels_tab[2] = avg_pixels_y2_mmx2; |
1166 avg_pixels_tab[3] = avg_pixels_xy2_mmx2; | 1005 avg_pixels_tab[3] = avg_pixels_xy2_mmx2; |
1167 | |
1168 sub_pixels_tab[1] = sub_pixels_x2_mmx2; | |
1169 sub_pixels_tab[2] = sub_pixels_y2_mmx2; | |
1170 } else if (mm_flags & MM_3DNOW) { | 1006 } else if (mm_flags & MM_3DNOW) { |
1171 put_pixels_tab[1] = put_pixels_x2_3dnow; | 1007 put_pixels_tab[1] = put_pixels_x2_3dnow; |
1172 put_pixels_tab[2] = put_pixels_y2_3dnow; | 1008 put_pixels_tab[2] = put_pixels_y2_3dnow; |
1173 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_3dnow; | 1009 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_3dnow; |
1174 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_3dnow; | 1010 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_3dnow; |
1175 | 1011 |
1176 avg_pixels_tab[0] = avg_pixels_3dnow; | 1012 avg_pixels_tab[0] = avg_pixels_3dnow; |
1177 avg_pixels_tab[1] = avg_pixels_x2_3dnow; | 1013 avg_pixels_tab[1] = avg_pixels_x2_3dnow; |
1178 avg_pixels_tab[2] = avg_pixels_y2_3dnow; | 1014 avg_pixels_tab[2] = avg_pixels_y2_3dnow; |
1179 avg_pixels_tab[3] = avg_pixels_xy2_3dnow; | 1015 avg_pixels_tab[3] = avg_pixels_xy2_3dnow; |
1180 | |
1181 sub_pixels_tab[1] = sub_pixels_x2_3dnow; | |
1182 sub_pixels_tab[2] = sub_pixels_y2_3dnow; | |
1183 } | 1016 } |
1184 | 1017 |
1185 /* idct */ | 1018 /* idct */ |
1186 if (mm_flags & MM_MMXEXT) { | 1019 if (mm_flags & MM_MMXEXT) { |
1187 ff_idct = ff_mmxext_idct; | 1020 ff_idct = ff_mmxext_idct; |
1223 avg_no_rnd_pixels_tab[0] = just_return; | 1056 avg_no_rnd_pixels_tab[0] = just_return; |
1224 avg_no_rnd_pixels_tab[1] = just_return; | 1057 avg_no_rnd_pixels_tab[1] = just_return; |
1225 avg_no_rnd_pixels_tab[2] = just_return; | 1058 avg_no_rnd_pixels_tab[2] = just_return; |
1226 avg_no_rnd_pixels_tab[3] = just_return; | 1059 avg_no_rnd_pixels_tab[3] = just_return; |
1227 | 1060 |
1228 sub_pixels_tab[0] = just_return; | |
1229 sub_pixels_tab[1] = just_return; | |
1230 sub_pixels_tab[2] = just_return; | |
1231 sub_pixels_tab[3] = just_return; | |
1232 | |
1233 //av_fdct = just_return; | 1061 //av_fdct = just_return; |
1234 //ff_idct = just_return; | 1062 //ff_idct = just_return; |
1235 #endif | 1063 #endif |
1236 } | 1064 } |
1065 | |
1066 /* remove any non bit exact operation (testing purpose). NOTE that | |
1067 this function should be kept as small as possible because it is | |
1068 always difficult to test automatically non bit exact cases. */ | |
1069 void dsputil_set_bit_exact_mmx(void) | |
1070 { | |
1071 if (mm_flags & MM_MMX) { | |
1072 if (mm_flags & MM_MMXEXT) { | |
1073 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
1074 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
1075 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
1076 } else if (mm_flags & MM_3DNOW) { | |
1077 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | |
1078 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | |
1079 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | |
1080 } | |
1081 } | |
1082 } |