Mercurial > mplayer.hg
annotate postproc/swscale.c @ 3126:e71ae0213431
runtime cpu detection
author | michael |
---|---|
date | Mon, 26 Nov 2001 00:31:43 +0000 |
parents | 7847d6b7ad3d |
children | b196b915fdc4 |
rev | line source |
---|---|
2216 | 1 |
2 // Software scaling and colorspace conversion routines for MPlayer | |
3 | |
2269 | 4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu> |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
5 // current version mostly by Michael Niedermayer (michaelni@gmx.at) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
6 // the parts written by michael are under GNU GPL |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
7 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
8 #include <inttypes.h> |
2476 | 9 #include <string.h> |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
10 #include "../config.h" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
11 #include "swscale.h" |
2520 | 12 #include "../mmx_defs.h" |
3126 | 13 #include "../cpudetect.h" |
2540 | 14 #undef MOVNTQ |
2680 | 15 #undef PAVGB |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
16 |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
17 //#undef HAVE_MMX2 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
18 //#undef HAVE_MMX |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
19 //#undef ARCH_X86 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
20 #define DITHER1XBPP |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
21 int fullUVIpol=0; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
22 //disables the unscaled height version |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
23 int allwaysIpol=0; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
24 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
25 #define RET 0xC3 //near return opcode |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
26 /* |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
27 NOTES |
2216 | 28 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
29 known BUGS with known cause (no bugreports please!, but patches are welcome :) ) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
30 horizontal MMX2 scaler reads 1-7 samples too much (might cause a sig11) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
31 |
2326 | 32 Supported output formats BGR15 BGR16 BGR24 BGR32 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
33 BGR15 & BGR16 MMX verions support dithering |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
34 Special versions: fast Y 1:1 scaling (no interpolation in y direction) |
2216 | 35 |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
36 TODO |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
37 more intelligent missalignment avoidance for the horizontal scaler |
2566 | 38 bicubic scaler |
2585 | 39 dither in C |
40 change the distance of the u & v buffer | |
3126 | 41 how to differenciate between x86 an C at runtime ?! (using C for now) |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
42 */ |
2216 | 43 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
44 #define ABS(a) ((a) > 0 ? (a) : (-(a))) |
2469 | 45 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
46 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
47 |
3126 | 48 #ifdef ARCH_X86 |
49 #define CAN_COMPILE_X86_ASM | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
50 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
51 |
3126 | 52 #ifdef CAN_COMPILE_X86_ASM |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
53 static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL; |
2503 | 54 static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL; |
55 static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL; | |
56 static uint64_t __attribute__((aligned(8))) vgCoeff= 0xE5E2E5E2E5E2E5E2LL; | |
57 static uint64_t __attribute__((aligned(8))) ugCoeff= 0xF36EF36EF36EF36ELL; | |
2669 | 58 static uint64_t __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL; |
59 static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
60 static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
61 static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
62 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
63 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
64 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
65 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
66 |
2750
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
67 static volatile uint64_t __attribute__((aligned(8))) b5Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
68 static volatile uint64_t __attribute__((aligned(8))) g5Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
69 static volatile uint64_t __attribute__((aligned(8))) g6Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
70 static volatile uint64_t __attribute__((aligned(8))) r5Dither; |
2748 | 71 |
72 static uint64_t __attribute__((aligned(8))) dither4[2]={ | |
73 0x0103010301030103LL, | |
74 0x0200020002000200LL,}; | |
75 | |
76 static uint64_t __attribute__((aligned(8))) dither8[2]={ | |
77 0x0602060206020602LL, | |
78 0x0004000400040004LL,}; | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
79 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
80 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
81 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
82 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
83 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
84 static uint64_t __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
85 static uint64_t __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
86 |
2730 | 87 static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; |
88 static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; | |
89 static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; | |
90 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
91 static uint64_t __attribute__((aligned(8))) temp0; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
92 static uint64_t __attribute__((aligned(8))) asm_yalpha1; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
93 static uint64_t __attribute__((aligned(8))) asm_uvalpha1; |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
94 |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
95 // temporary storage for 4 yuv lines: |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
96 // 16bit for now (mmx likes it more compact) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
97 static uint16_t __attribute__((aligned(8))) pix_buf_y[4][2048]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
98 static uint16_t __attribute__((aligned(8))) pix_buf_uv[2][2048*2]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
99 #else |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
100 static uint16_t pix_buf_y[4][2048]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
101 static uint16_t pix_buf_uv[2][2048*2]; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
102 #endif |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
103 |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
104 // clipping helper table for C implementations: |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
105 static unsigned char clip_table[768]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
106 |
2584 | 107 static unsigned short clip_table16b[768]; |
108 static unsigned short clip_table16g[768]; | |
109 static unsigned short clip_table16r[768]; | |
110 static unsigned short clip_table15b[768]; | |
111 static unsigned short clip_table15g[768]; | |
112 static unsigned short clip_table15r[768]; | |
113 | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
114 // yuv->rgb conversion tables: |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
115 static int yuvtab_2568[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
116 static int yuvtab_3343[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
117 static int yuvtab_0c92[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
118 static int yuvtab_1a1e[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
119 static int yuvtab_40cf[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
120 |
3126 | 121 #ifdef CAN_COMPILE_X86_ASM |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
122 static uint8_t funnyYCode[10000]; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
123 static uint8_t funnyUVCode[10000]; |
2671 | 124 #endif |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
125 |
2469 | 126 static int canMMX2BeUsed=0; |
127 | |
3126 | 128 #ifdef CAN_COMPILE_X86_ASM |
2671 | 129 void in_asm_used_var_warning_killer() |
130 { | |
131 int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ | |
2748 | 132 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ |
2730 | 133 M24A+M24B+M24C; |
2671 | 134 if(i) i=0; |
135 } | |
136 #endif | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
137 |
3126 | 138 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one |
139 //Plain C versions | |
140 #undef HAVE_MMX | |
141 #undef HAVE_MMX2 | |
142 #undef HAVE_3DNOW | |
143 #undef ARCH_X86 | |
144 #define RENAME(a) a ## _C | |
145 #include "swscale_template.c" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
146 |
3126 | 147 #ifdef CAN_COMPILE_X86_ASM |
2576 | 148 |
3126 | 149 //X86 versions |
150 /* | |
151 #undef RENAME | |
152 #undef HAVE_MMX | |
153 #undef HAVE_MMX2 | |
154 #undef HAVE_3DNOW | |
155 #define ARCH_X86 | |
156 #define RENAME(a) a ## _X86 | |
157 #include "swscale_template.c" | |
158 */ | |
159 //MMX versions | |
160 #undef RENAME | |
161 #define HAVE_MMX | |
162 #undef HAVE_MMX2 | |
163 #undef HAVE_3DNOW | |
164 #define ARCH_X86 | |
165 #define RENAME(a) a ## _MMX | |
166 #include "swscale_template.c" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
167 |
3126 | 168 //MMX2 versions |
169 #undef RENAME | |
170 #define HAVE_MMX | |
171 #define HAVE_MMX2 | |
172 #undef HAVE_3DNOW | |
173 #define ARCH_X86 | |
174 #define RENAME(a) a ## _MMX2 | |
175 #include "swscale_template.c" | |
2469 | 176 |
3126 | 177 //3DNOW versions |
178 #undef RENAME | |
179 #define HAVE_MMX | |
180 #undef HAVE_MMX2 | |
181 #define HAVE_3DNOW | |
182 #define ARCH_X86 | |
183 #define RENAME(a) a ## _3DNow | |
184 #include "swscale_template.c" | |
2469 | 185 |
3126 | 186 #endif //CAN_COMPILE_X86_ASM |
2469 | 187 |
3126 | 188 // minor note: the HAVE_xyz is messed up after that line so dont use it |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
189 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
190 |
2519 | 191 // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices: |
2216 | 192 // *** Note: it's called multiple times while decoding a frame, first time y==0 |
193 // *** Designed to upscale, but may work for downscale too. | |
2274 | 194 // s_xinc = (src_width << 16) / dst_width |
2216 | 195 // s_yinc = (src_height << 16) / dst_height |
3126 | 196 // switching the cpu type during a sliced drawing can have bad effects, like sig11 |
2519 | 197 void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int y, int h, |
198 uint8_t* dstptr[], int dststride, int dstw, int dstbpp, | |
2216 | 199 unsigned int s_xinc,unsigned int s_yinc){ |
200 | |
201 // scaling factors: | |
202 //static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height; | |
203 //static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width; | |
204 | |
3126 | 205 #ifdef CAN_COMPILE_X86_ASM |
206 // ordered per speed fasterst first | |
207 if(gCpuCaps.hasMMX2) | |
208 SwScale_YV12slice_MMX2(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); | |
209 else if(gCpuCaps.has3DNow) | |
210 SwScale_YV12slice_3DNow(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); | |
211 else if(gCpuCaps.hasMMX) | |
212 SwScale_YV12slice_MMX(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); | |
213 else | |
214 SwScale_YV12slice_C(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); | |
215 #else | |
216 SwScale_YV12slice_C(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); | |
2270 | 217 #endif |
218 | |
2216 | 219 } |
220 | |
221 void SwScale_Init(){ | |
222 // generating tables: | |
223 int i; | |
224 for(i=0;i<256;i++){ | |
225 clip_table[i]=0; | |
226 clip_table[i+256]=i; | |
227 clip_table[i+512]=255; | |
228 yuvtab_2568[i]=(0x2568*(i-16))+(256<<13); | |
229 yuvtab_3343[i]=0x3343*(i-128); | |
230 yuvtab_0c92[i]=-0x0c92*(i-128); | |
231 yuvtab_1a1e[i]=-0x1a1e*(i-128); | |
232 yuvtab_40cf[i]=0x40cf*(i-128); | |
233 } | |
234 | |
2584 | 235 for(i=0; i<768; i++) |
236 { | |
237 int v= clip_table[i]; | |
238 clip_table16b[i]= v>>3; | |
239 clip_table16g[i]= (v<<3)&0x07E0; | |
240 clip_table16r[i]= (v<<8)&0xF800; | |
241 clip_table15b[i]= v>>3; | |
242 clip_table15g[i]= (v<<2)&0x03E0; | |
243 clip_table15r[i]= (v<<7)&0x7C00; | |
244 } | |
3126 | 245 } |
2584 | 246 |