comparison i386/dsputil_mmx.c @ 415:1c3f42442fba libavcodec

* added simple test main - see comments about how to compile - should be probably made as a regression test
author kabi
date Thu, 23 May 2002 10:09:33 +0000
parents 92d143c2d5a8
children 040d86058c4a
comparison
equal deleted inserted replaced
414:f56e4d08e082 415:1c3f42442fba
926 926
927 #if 0 927 #if 0
928 static void just_return() { return; } 928 static void just_return() { return; }
929 #endif 929 #endif
930 930
931 #ifndef TESTCPU_MAIN
931 void dsputil_init_mmx(void) 932 void dsputil_init_mmx(void)
932 { 933 {
933 mm_flags = mm_support(); 934 mm_flags = mm_support();
934 #if 1 935 #if 1
935 printf("libavcodec: CPU flags:"); 936 printf("libavcodec: CPU flags:");
950 get_pixels = get_pixels_mmx; 951 get_pixels = get_pixels_mmx;
951 diff_pixels = diff_pixels_mmx; 952 diff_pixels = diff_pixels_mmx;
952 put_pixels_clamped = put_pixels_clamped_mmx; 953 put_pixels_clamped = put_pixels_clamped_mmx;
953 add_pixels_clamped = add_pixels_clamped_mmx; 954 add_pixels_clamped = add_pixels_clamped_mmx;
954 clear_blocks= clear_blocks_mmx; 955 clear_blocks= clear_blocks_mmx;
955 956
956 pix_abs16x16 = pix_abs16x16_mmx; 957 pix_abs16x16 = pix_abs16x16_mmx;
957 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; 958 pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
958 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; 959 pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
959 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; 960 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
960 pix_abs8x8 = pix_abs8x8_mmx; 961 pix_abs8x8 = pix_abs8x8_mmx;
970 971
971 put_no_rnd_pixels_tab[0] = put_pixels_mmx; 972 put_no_rnd_pixels_tab[0] = put_pixels_mmx;
972 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; 973 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx;
973 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; 974 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
974 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx; 975 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx;
975 976
976 avg_pixels_tab[0] = avg_pixels_mmx; 977 avg_pixels_tab[0] = avg_pixels_mmx;
977 avg_pixels_tab[1] = avg_pixels_x2_mmx; 978 avg_pixels_tab[1] = avg_pixels_x2_mmx;
978 avg_pixels_tab[2] = avg_pixels_y2_mmx; 979 avg_pixels_tab[2] = avg_pixels_y2_mmx;
979 avg_pixels_tab[3] = avg_pixels_xy2_mmx; 980 avg_pixels_tab[3] = avg_pixels_xy2_mmx;
980 981
986 if (mm_flags & MM_MMXEXT) { 987 if (mm_flags & MM_MMXEXT) {
987 pix_abs16x16 = pix_abs16x16_mmx2; 988 pix_abs16x16 = pix_abs16x16_mmx2;
988 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; 989 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
989 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; 990 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
990 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; 991 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
991 992
992 pix_abs8x8 = pix_abs8x8_mmx2; 993 pix_abs8x8 = pix_abs8x8_mmx2;
993 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; 994 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
994 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; 995 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
995 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; 996 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
996 997
997 put_pixels_tab[1] = put_pixels_x2_mmx2; 998 put_pixels_tab[1] = put_pixels_x2_mmx2;
998 put_pixels_tab[2] = put_pixels_y2_mmx2; 999 put_pixels_tab[2] = put_pixels_y2_mmx2;
999 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2; 1000 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2;
1000 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2; 1001 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2;
1001 1002
1002 avg_pixels_tab[0] = avg_pixels_mmx2; 1003 avg_pixels_tab[0] = avg_pixels_mmx2;
1003 avg_pixels_tab[1] = avg_pixels_x2_mmx2; 1004 avg_pixels_tab[1] = avg_pixels_x2_mmx2;
1004 avg_pixels_tab[2] = avg_pixels_y2_mmx2; 1005 avg_pixels_tab[2] = avg_pixels_y2_mmx2;
1005 avg_pixels_tab[3] = avg_pixels_xy2_mmx2; 1006 avg_pixels_tab[3] = avg_pixels_xy2_mmx2;
1006 } else if (mm_flags & MM_3DNOW) { 1007 } else if (mm_flags & MM_3DNOW) {
1078 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; 1079 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx;
1079 avg_pixels_tab[3] = avg_pixels_xy2_mmx; 1080 avg_pixels_tab[3] = avg_pixels_xy2_mmx;
1080 } 1081 }
1081 } 1082 }
1082 } 1083 }
1084
1085 #else // TESTCPU_MAIN
1086 /*
1087 * for testing speed of various routine - should be probably extended
1088 * for a general purpose regression test later
1089 *
1090 * for now use it this way:
1091 *
1092 * gcc -O4 -fomit-frame-pointer -DHAVE_AV_CONFIG_H -DTESTCPU_MAIN -I../.. -o test dsputil_mmx.c
1093 *
1094 * in libavcodec/i386 directory - then run ./test
1095 */
1096 static inline long long rdtsc()
1097 {
1098 long long l;
1099 asm volatile( "rdtsc\n\t"
1100 : "=A" (l)
1101 );
1102 return l;
1103 }
1104
1105 int main(int argc, char* argv[])
1106 {
1107 volatile int v;
1108 int i;
1109 const int linesize = 720;
1110 char bu[32768];
1111 uint64_t te, ts = rdtsc();
1112 char* im = bu;
1113 op_pixels_func fc = put_pixels_y2_mmx2;
1114 for(i=0; i<1000000; i++){
1115 fc(im, im + 1000, linesize, 16);
1116 im += 16; //
1117 if (im > bu + 10000)
1118 im = bu;
1119 }
1120 te = rdtsc();
1121 printf("CPU Ticks: %7d\n", (int)(te - ts));
1122 fflush(stdout);
1123 }
1124 #endif