Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 415:1c3f42442fba libavcodec
* added simple test main - see comments about how to
compile - should be probably made as a regression test
author | kabi |
---|---|
date | Thu, 23 May 2002 10:09:33 +0000 |
parents | 92d143c2d5a8 |
children | 040d86058c4a |
comparison
equal
deleted
inserted
replaced
414:f56e4d08e082 | 415:1c3f42442fba |
---|---|
926 | 926 |
927 #if 0 | 927 #if 0 |
928 static void just_return() { return; } | 928 static void just_return() { return; } |
929 #endif | 929 #endif |
930 | 930 |
931 #ifndef TESTCPU_MAIN | |
931 void dsputil_init_mmx(void) | 932 void dsputil_init_mmx(void) |
932 { | 933 { |
933 mm_flags = mm_support(); | 934 mm_flags = mm_support(); |
934 #if 1 | 935 #if 1 |
935 printf("libavcodec: CPU flags:"); | 936 printf("libavcodec: CPU flags:"); |
950 get_pixels = get_pixels_mmx; | 951 get_pixels = get_pixels_mmx; |
951 diff_pixels = diff_pixels_mmx; | 952 diff_pixels = diff_pixels_mmx; |
952 put_pixels_clamped = put_pixels_clamped_mmx; | 953 put_pixels_clamped = put_pixels_clamped_mmx; |
953 add_pixels_clamped = add_pixels_clamped_mmx; | 954 add_pixels_clamped = add_pixels_clamped_mmx; |
954 clear_blocks= clear_blocks_mmx; | 955 clear_blocks= clear_blocks_mmx; |
955 | 956 |
956 pix_abs16x16 = pix_abs16x16_mmx; | 957 pix_abs16x16 = pix_abs16x16_mmx; |
957 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | 958 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
958 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | 959 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
959 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | 960 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
960 pix_abs8x8 = pix_abs8x8_mmx; | 961 pix_abs8x8 = pix_abs8x8_mmx; |
970 | 971 |
971 put_no_rnd_pixels_tab[0] = put_pixels_mmx; | 972 put_no_rnd_pixels_tab[0] = put_pixels_mmx; |
972 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; | 973 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; |
973 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | 974 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; |
974 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx; | 975 put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx; |
975 | 976 |
976 avg_pixels_tab[0] = avg_pixels_mmx; | 977 avg_pixels_tab[0] = avg_pixels_mmx; |
977 avg_pixels_tab[1] = avg_pixels_x2_mmx; | 978 avg_pixels_tab[1] = avg_pixels_x2_mmx; |
978 avg_pixels_tab[2] = avg_pixels_y2_mmx; | 979 avg_pixels_tab[2] = avg_pixels_y2_mmx; |
979 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | 980 avg_pixels_tab[3] = avg_pixels_xy2_mmx; |
980 | 981 |
986 if (mm_flags & MM_MMXEXT) { | 987 if (mm_flags & MM_MMXEXT) { |
987 pix_abs16x16 = pix_abs16x16_mmx2; | 988 pix_abs16x16 = pix_abs16x16_mmx2; |
988 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; | 989 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; |
989 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; | 990 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; |
990 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; | 991 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; |
991 | 992 |
992 pix_abs8x8 = pix_abs8x8_mmx2; | 993 pix_abs8x8 = pix_abs8x8_mmx2; |
993 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; | 994 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; |
994 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; | 995 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; |
995 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; | 996 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; |
996 | 997 |
997 put_pixels_tab[1] = put_pixels_x2_mmx2; | 998 put_pixels_tab[1] = put_pixels_x2_mmx2; |
998 put_pixels_tab[2] = put_pixels_y2_mmx2; | 999 put_pixels_tab[2] = put_pixels_y2_mmx2; |
999 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2; | 1000 put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx2; |
1000 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2; | 1001 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx2; |
1001 | 1002 |
1002 avg_pixels_tab[0] = avg_pixels_mmx2; | 1003 avg_pixels_tab[0] = avg_pixels_mmx2; |
1003 avg_pixels_tab[1] = avg_pixels_x2_mmx2; | 1004 avg_pixels_tab[1] = avg_pixels_x2_mmx2; |
1004 avg_pixels_tab[2] = avg_pixels_y2_mmx2; | 1005 avg_pixels_tab[2] = avg_pixels_y2_mmx2; |
1005 avg_pixels_tab[3] = avg_pixels_xy2_mmx2; | 1006 avg_pixels_tab[3] = avg_pixels_xy2_mmx2; |
1006 } else if (mm_flags & MM_3DNOW) { | 1007 } else if (mm_flags & MM_3DNOW) { |
1078 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; | 1079 put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; |
1079 avg_pixels_tab[3] = avg_pixels_xy2_mmx; | 1080 avg_pixels_tab[3] = avg_pixels_xy2_mmx; |
1080 } | 1081 } |
1081 } | 1082 } |
1082 } | 1083 } |
1084 | |
1085 #else // TESTCPU_MAIN | |
1086 /* | |
1087 * for testing speed of various routine - should be probably extended | |
1088 * for a general purpose regression test later | |
1089 * | |
1090 * for now use it this way: | |
1091 * | |
1092 * gcc -O4 -fomit-frame-pointer -DHAVE_AV_CONFIG_H -DTESTCPU_MAIN -I../.. -o test dsputil_mmx.c | |
1093 * | |
1094 * in libavcodec/i386 directory - then run ./test | |
1095 */ | |
1096 static inline long long rdtsc() | |
1097 { | |
1098 long long l; | |
1099 asm volatile( "rdtsc\n\t" | |
1100 : "=A" (l) | |
1101 ); | |
1102 return l; | |
1103 } | |
1104 | |
1105 int main(int argc, char* argv[]) | |
1106 { | |
1107 volatile int v; | |
1108 int i; | |
1109 const int linesize = 720; | |
1110 char bu[32768]; | |
1111 uint64_t te, ts = rdtsc(); | |
1112 char* im = bu; | |
1113 op_pixels_func fc = put_pixels_y2_mmx2; | |
1114 for(i=0; i<1000000; i++){ | |
1115 fc(im, im + 1000, linesize, 16); | |
1116 im += 16; // | |
1117 if (im > bu + 10000) | |
1118 im = bu; | |
1119 } | |
1120 te = rdtsc(); | |
1121 printf("CPU Ticks: %7d\n", (int)(te - ts)); | |
1122 fflush(stdout); | |
1123 } | |
1124 #endif |