Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 294:944632089814 libavcodec
4MV motion estimation (not finished yet)
SAD functions rewritten (8x8 support & MMX2 optimizations)
HQ inter/intra decission
msmpeg4 encoding bugfix (MV where too long)
author | michaelni |
---|---|
date | Wed, 27 Mar 2002 21:25:22 +0000 |
parents | 6f48cacd9ed9 |
children | c1a8a1b4a24b |
comparison
equal
deleted
inserted
replaced
293:6eaf5da091fa | 294:944632089814 |
---|---|
22 #include "../dsputil.h" | 22 #include "../dsputil.h" |
23 #include "../simple_idct.h" | 23 #include "../simple_idct.h" |
24 | 24 |
25 int mm_flags; /* multimedia extension flags */ | 25 int mm_flags; /* multimedia extension flags */ |
26 | 26 |
27 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 27 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
28 int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 28 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
29 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 29 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
30 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 30 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); |
31 int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); | 31 |
32 int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
33 int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
34 int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
35 int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
36 | |
37 int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
38 int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
39 int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
40 int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); | |
41 | |
42 int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
43 int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
44 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
45 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); | |
46 | |
32 | 47 |
33 /* external functions, from idct_mmx.c */ | 48 /* external functions, from idct_mmx.c */ |
34 void ff_mmx_idct(DCTELEM *block); | 49 void ff_mmx_idct(DCTELEM *block); |
35 void ff_mmxext_idct(DCTELEM *block); | 50 void ff_mmxext_idct(DCTELEM *block); |
36 | 51 |
37 /* pixel operations */ | 52 /* pixel operations */ |
38 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; | 53 static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001LL; |
39 static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; | 54 static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002LL; |
40 //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; | 55 //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; |
41 //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; | 56 //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; |
42 | 57 |
43 #define JUMPALIGN() __asm __volatile (".balign 8"::) | 58 #define JUMPALIGN() __asm __volatile (".balign 8"::) |
44 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) | 59 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) |
1033 if (mm_flags & MM_MMX) { | 1048 if (mm_flags & MM_MMX) { |
1034 get_pixels = get_pixels_mmx; | 1049 get_pixels = get_pixels_mmx; |
1035 put_pixels_clamped = put_pixels_clamped_mmx; | 1050 put_pixels_clamped = put_pixels_clamped_mmx; |
1036 add_pixels_clamped = add_pixels_clamped_mmx; | 1051 add_pixels_clamped = add_pixels_clamped_mmx; |
1037 | 1052 |
1038 pix_abs16x16 = pix_abs16x16_mmx; | 1053 pix_abs16x16 = pix_abs16x16_mmx; |
1039 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; | 1054 pix_abs16x16_x2 = pix_abs16x16_x2_mmx; |
1040 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; | 1055 pix_abs16x16_y2 = pix_abs16x16_y2_mmx; |
1041 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; | 1056 pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; |
1057 pix_abs8x8 = pix_abs8x8_mmx; | |
1058 pix_abs8x8_x2 = pix_abs8x8_x2_mmx; | |
1059 pix_abs8x8_y2 = pix_abs8x8_y2_mmx; | |
1060 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; | |
1042 av_fdct = fdct_mmx; | 1061 av_fdct = fdct_mmx; |
1043 | 1062 |
1044 put_pixels_tab[0] = put_pixels_mmx; | 1063 put_pixels_tab[0] = put_pixels_mmx; |
1045 put_pixels_tab[1] = put_pixels_x2_mmx; | 1064 put_pixels_tab[1] = put_pixels_x2_mmx; |
1046 put_pixels_tab[2] = put_pixels_y2_mmx; | 1065 put_pixels_tab[2] = put_pixels_y2_mmx; |
1065 sub_pixels_tab[1] = sub_pixels_x2_mmx; | 1084 sub_pixels_tab[1] = sub_pixels_x2_mmx; |
1066 sub_pixels_tab[2] = sub_pixels_y2_mmx; | 1085 sub_pixels_tab[2] = sub_pixels_y2_mmx; |
1067 sub_pixels_tab[3] = sub_pixels_xy2_mmx; | 1086 sub_pixels_tab[3] = sub_pixels_xy2_mmx; |
1068 | 1087 |
1069 if (mm_flags & MM_MMXEXT) { | 1088 if (mm_flags & MM_MMXEXT) { |
1070 pix_abs16x16 = pix_abs16x16_sse; | 1089 pix_abs16x16 = pix_abs16x16_mmx2; |
1071 } | 1090 pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; |
1072 | 1091 pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; |
1073 if (mm_flags & MM_SSE) { | 1092 pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; |
1093 | |
1094 pix_abs8x8 = pix_abs8x8_mmx2; | |
1095 pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; | |
1096 pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; | |
1097 pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; | |
1098 | |
1074 put_pixels_tab[1] = put_pixels_x2_sse; | 1099 put_pixels_tab[1] = put_pixels_x2_sse; |
1075 put_pixels_tab[2] = put_pixels_y2_sse; | 1100 put_pixels_tab[2] = put_pixels_y2_sse; |
1076 | 1101 |
1077 avg_pixels_tab[0] = avg_pixels_sse; | 1102 avg_pixels_tab[0] = avg_pixels_sse; |
1078 avg_pixels_tab[1] = avg_pixels_x2_sse; | 1103 avg_pixels_tab[1] = avg_pixels_x2_sse; |