comparison i386/dsputil_mmx.c @ 294:944632089814 libavcodec

4MV motion estimation (not finished yet) SAD functions rewritten (8x8 support & MMX2 optimizations) HQ inter/intra decission msmpeg4 encoding bugfix (MV where too long)
author michaelni
date Wed, 27 Mar 2002 21:25:22 +0000
parents 6f48cacd9ed9
children c1a8a1b4a24b
comparison
equal deleted inserted replaced
293:6eaf5da091fa 294:944632089814
22 #include "../dsputil.h" 22 #include "../dsputil.h"
23 #include "../simple_idct.h" 23 #include "../simple_idct.h"
24 24
25 int mm_flags; /* multimedia extension flags */ 25 int mm_flags; /* multimedia extension flags */
26 26
27 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); 27 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
28 int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h); 28 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
29 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); 29 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
30 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); 30 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
31 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); 31
32 int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
33 int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
34 int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
35 int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
36
37 int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
38 int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
39 int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
40 int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
41
42 int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
43 int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
44 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
45 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
46
32 47
33 /* external functions, from idct_mmx.c */ 48 /* external functions, from idct_mmx.c */
34 void ff_mmx_idct(DCTELEM *block); 49 void ff_mmx_idct(DCTELEM *block);
35 void ff_mmxext_idct(DCTELEM *block); 50 void ff_mmxext_idct(DCTELEM *block);
36 51
37 /* pixel operations */ 52 /* pixel operations */
38 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; 53 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001LL;
39 static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; 54 static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002LL;
40 //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; 55 //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 };
41 //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; 56 //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 };
42 57
43 #define JUMPALIGN() __asm __volatile (".balign 8"::) 58 #define JUMPALIGN() __asm __volatile (".balign 8"::)
44 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) 59 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
1033 if (mm_flags & MM_MMX) { 1048 if (mm_flags & MM_MMX) {
1034 get_pixels = get_pixels_mmx; 1049 get_pixels = get_pixels_mmx;
1035 put_pixels_clamped = put_pixels_clamped_mmx; 1050 put_pixels_clamped = put_pixels_clamped_mmx;
1036 add_pixels_clamped = add_pixels_clamped_mmx; 1051 add_pixels_clamped = add_pixels_clamped_mmx;
1037 1052
1038 pix_abs16x16 = pix_abs16x16_mmx; 1053 pix_abs16x16 = pix_abs16x16_mmx;
1039 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; 1054 pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
1040 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; 1055 pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
1041 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; 1056 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
1057 pix_abs8x8 = pix_abs8x8_mmx;
1058 pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
1059 pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
1060 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
1042 av_fdct = fdct_mmx; 1061 av_fdct = fdct_mmx;
1043 1062
1044 put_pixels_tab[0] = put_pixels_mmx; 1063 put_pixels_tab[0] = put_pixels_mmx;
1045 put_pixels_tab[1] = put_pixels_x2_mmx; 1064 put_pixels_tab[1] = put_pixels_x2_mmx;
1046 put_pixels_tab[2] = put_pixels_y2_mmx; 1065 put_pixels_tab[2] = put_pixels_y2_mmx;
1065 sub_pixels_tab[1] = sub_pixels_x2_mmx; 1084 sub_pixels_tab[1] = sub_pixels_x2_mmx;
1066 sub_pixels_tab[2] = sub_pixels_y2_mmx; 1085 sub_pixels_tab[2] = sub_pixels_y2_mmx;
1067 sub_pixels_tab[3] = sub_pixels_xy2_mmx; 1086 sub_pixels_tab[3] = sub_pixels_xy2_mmx;
1068 1087
1069 if (mm_flags & MM_MMXEXT) { 1088 if (mm_flags & MM_MMXEXT) {
1070 pix_abs16x16 = pix_abs16x16_sse; 1089 pix_abs16x16 = pix_abs16x16_mmx2;
1071 } 1090 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
1072 1091 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
1073 if (mm_flags & MM_SSE) { 1092 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
1093
1094 pix_abs8x8 = pix_abs8x8_mmx2;
1095 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
1096 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
1097 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
1098
1074 put_pixels_tab[1] = put_pixels_x2_sse; 1099 put_pixels_tab[1] = put_pixels_x2_sse;
1075 put_pixels_tab[2] = put_pixels_y2_sse; 1100 put_pixels_tab[2] = put_pixels_y2_sse;
1076 1101
1077 avg_pixels_tab[0] = avg_pixels_sse; 1102 avg_pixels_tab[0] = avg_pixels_sse;
1078 avg_pixels_tab[1] = avg_pixels_x2_sse; 1103 avg_pixels_tab[1] = avg_pixels_x2_sse;