Mercurial > mplayer.hg
comparison postproc/rgb2rgb.c @ 9987:988c2ffc5bc1
remove remaining cpudetect dependancy
author | michael |
---|---|
date | Fri, 25 Apr 2003 17:16:55 +0000 |
parents | 50ef22bcc0c3 |
children | f33f908ae085 |
comparison
equal
deleted
inserted
replaced
9986:4bdd248d372e | 9987:988c2ffc5bc1 |
---|---|
8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) | 8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) |
9 */ | 9 */ |
10 #include <inttypes.h> | 10 #include <inttypes.h> |
11 #include "../config.h" | 11 #include "../config.h" |
12 #include "rgb2rgb.h" | 12 #include "rgb2rgb.h" |
13 #include "../cpudetect.h" | 13 #include "swscale.h" |
14 #include "../mangle.h" | 14 #include "../mangle.h" |
15 #include "../bswap.h" | 15 #include "../bswap.h" |
16 #include "../libvo/fastmemcpy.h" | 16 #include "../libvo/fastmemcpy.h" |
17 | 17 |
18 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit | |
19 | |
20 void (*rgb24to32)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
21 void (*rgb24to16)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
22 void (*rgb24to15)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
23 void (*rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
24 void (*rgb32to16)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
25 void (*rgb32to15)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
26 void (*rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
27 void (*rgb15to24)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
28 void (*rgb15to32)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
29 void (*rgb16to15)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
30 void (*rgb16to24)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
31 void (*rgb16to32)(const uint8_t *src,uint8_t *dst,unsigned src_size); | |
32 //void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, unsigned src_size); | |
33 void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned src_size); | |
34 void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, unsigned src_size); | |
35 void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size); | |
36 void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned src_size); | |
37 //void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, unsigned src_size); | |
38 void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, unsigned src_size); | |
39 void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size); | |
40 | |
41 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
42 unsigned int width, unsigned int height, | |
43 int lumStride, int chromStride, int dstStride); | |
44 void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
45 unsigned int width, unsigned int height, | |
46 int lumStride, int chromStride, int dstStride); | |
47 void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
48 unsigned int width, unsigned int height, | |
49 int lumStride, int chromStride, int srcStride); | |
50 void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
51 unsigned int width, unsigned int height, | |
52 int lumStride, int chromStride, int srcStride); | |
53 void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride); | |
54 void (*interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst, | |
55 unsigned width, unsigned height, int src1Stride, | |
56 int src2Stride, int dstStride); | |
57 void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |
58 uint8_t *dst1, uint8_t *dst2, | |
59 unsigned width, unsigned height, | |
60 int srcStride1, int srcStride2, | |
61 int dstStride1, int dstStride2); | |
62 void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, | |
63 uint8_t *dst, | |
64 unsigned width, unsigned height, | |
65 int srcStride1, int srcStride2, | |
66 int srcStride3, int dstStride); | |
67 | |
18 #ifdef ARCH_X86 | 68 #ifdef ARCH_X86 |
19 #define CAN_COMPILE_X86_ASM | |
20 #endif | |
21 | |
22 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit | |
23 | |
24 #ifdef CAN_COMPILE_X86_ASM | |
25 static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL; | 69 static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL; |
26 static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; | 70 static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; |
27 static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL; | 71 static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL; |
28 static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; | 72 static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; |
29 static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; | 73 static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; |
104 #undef ARCH_X86 | 148 #undef ARCH_X86 |
105 #undef HAVE_SSE2 | 149 #undef HAVE_SSE2 |
106 #define RENAME(a) a ## _C | 150 #define RENAME(a) a ## _C |
107 #include "rgb2rgb_template.c" | 151 #include "rgb2rgb_template.c" |
108 | 152 |
109 #ifdef CAN_COMPILE_X86_ASM | 153 #ifdef ARCH_X86 |
110 | 154 |
111 //MMX versions | 155 //MMX versions |
112 #undef RENAME | 156 #undef RENAME |
113 #define HAVE_MMX | 157 #define HAVE_MMX |
114 #undef HAVE_MMX2 | 158 #undef HAVE_MMX2 |
136 #undef HAVE_SSE2 | 180 #undef HAVE_SSE2 |
137 #define ARCH_X86 | 181 #define ARCH_X86 |
138 #define RENAME(a) a ## _3DNow | 182 #define RENAME(a) a ## _3DNow |
139 #include "rgb2rgb_template.c" | 183 #include "rgb2rgb_template.c" |
140 | 184 |
141 #endif //CAN_COMPILE_X86_ASM | 185 #endif //ARCH_X86 |
142 | |
143 void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size) | |
144 { | |
145 #ifdef CAN_COMPILE_X86_ASM | |
146 // ordered per speed fasterst first | |
147 if(gCpuCaps.hasMMX2) | |
148 rgb24to32_MMX2(src, dst, src_size); | |
149 else if(gCpuCaps.has3DNow) | |
150 rgb24to32_3DNow(src, dst, src_size); | |
151 else if(gCpuCaps.hasMMX) | |
152 rgb24to32_MMX(src, dst, src_size); | |
153 else | |
154 #endif | |
155 rgb24to32_C(src, dst, src_size); | |
156 } | |
157 | |
158 void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size) | |
159 { | |
160 #ifdef CAN_COMPILE_X86_ASM | |
161 // ordered per speed fasterst first | |
162 if(gCpuCaps.hasMMX2) | |
163 rgb15to24_MMX2(src, dst, src_size); | |
164 else if(gCpuCaps.has3DNow) | |
165 rgb15to24_3DNow(src, dst, src_size); | |
166 else if(gCpuCaps.hasMMX) | |
167 rgb15to24_MMX(src, dst, src_size); | |
168 else | |
169 #endif | |
170 rgb15to24_C(src, dst, src_size); | |
171 } | |
172 | |
173 void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size) | |
174 { | |
175 #ifdef CAN_COMPILE_X86_ASM | |
176 // ordered per speed fasterst first | |
177 if(gCpuCaps.hasMMX2) | |
178 rgb16to24_MMX2(src, dst, src_size); | |
179 else if(gCpuCaps.has3DNow) | |
180 rgb16to24_3DNow(src, dst, src_size); | |
181 else if(gCpuCaps.hasMMX) | |
182 rgb16to24_MMX(src, dst, src_size); | |
183 else | |
184 #endif | |
185 rgb16to24_C(src, dst, src_size); | |
186 } | |
187 | |
188 void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size) | |
189 { | |
190 #ifdef CAN_COMPILE_X86_ASM | |
191 // ordered per speed fasterst first | |
192 if(gCpuCaps.hasMMX2) | |
193 rgb15to32_MMX2(src, dst, src_size); | |
194 else if(gCpuCaps.has3DNow) | |
195 rgb15to32_3DNow(src, dst, src_size); | |
196 else if(gCpuCaps.hasMMX) | |
197 rgb15to32_MMX(src, dst, src_size); | |
198 else | |
199 #endif | |
200 rgb15to32_C(src, dst, src_size); | |
201 } | |
202 | |
203 void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size) | |
204 { | |
205 #ifdef CAN_COMPILE_X86_ASM | |
206 // ordered per speed fasterst first | |
207 if(gCpuCaps.hasMMX2) | |
208 rgb16to32_MMX2(src, dst, src_size); | |
209 else if(gCpuCaps.has3DNow) | |
210 rgb16to32_3DNow(src, dst, src_size); | |
211 else if(gCpuCaps.hasMMX) | |
212 rgb16to32_MMX(src, dst, src_size); | |
213 else | |
214 #endif | |
215 rgb16to32_C(src, dst, src_size); | |
216 } | |
217 | |
218 void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size) | |
219 { | |
220 #ifdef CAN_COMPILE_X86_ASM | |
221 // ordered per speed fasterst first | |
222 if(gCpuCaps.hasMMX2) | |
223 rgb32to24_MMX2(src, dst, src_size); | |
224 else if(gCpuCaps.has3DNow) | |
225 rgb32to24_3DNow(src, dst, src_size); | |
226 else if(gCpuCaps.hasMMX) | |
227 rgb32to24_MMX(src, dst, src_size); | |
228 else | |
229 #endif | |
230 rgb32to24_C(src, dst, src_size); | |
231 } | |
232 | 186 |
233 /* | 187 /* |
234 Original by Strepto/Astral | 188 rgb15->rgb16 Original by Strepto/Astral |
235 ported to gcc & bugfixed : A'rpi | 189 ported to gcc & bugfixed : A'rpi |
236 MMX2, 3DNOW optimization by Nick Kurshev | 190 MMX2, 3DNOW optimization by Nick Kurshev |
237 32bit c version, and and&add trick by Michael Niedermayer | 191 32bit c version, and and&add trick by Michael Niedermayer |
238 */ | 192 */ |
239 void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size) | 193 |
240 { | 194 void sws_rgb2rgb_init(int flags){ |
241 #ifdef CAN_COMPILE_X86_ASM | 195 #ifdef ARCH_X86 |
242 // ordered per speed fasterst first | 196 if(flags & SWS_CPU_CAPS_MMX2){ |
243 if(gCpuCaps.hasMMX2) | 197 rgb15to16= rgb15to16_MMX2; |
244 rgb15to16_MMX2(src, dst, src_size); | 198 rgb15to24= rgb15to24_MMX2; |
245 else if(gCpuCaps.has3DNow) | 199 rgb15to32= rgb15to32_MMX2; |
246 rgb15to16_3DNow(src, dst, src_size); | 200 rgb16to24= rgb16to24_MMX2; |
247 else if(gCpuCaps.hasMMX) | 201 rgb16to32= rgb16to32_MMX2; |
248 rgb15to16_MMX(src, dst, src_size); | 202 rgb16to15= rgb16to15_MMX2; |
249 else | 203 rgb24to16= rgb24to16_MMX2; |
204 rgb24to15= rgb24to15_MMX2; | |
205 rgb24to32= rgb24to32_MMX2; | |
206 rgb32to16= rgb32to16_MMX2; | |
207 rgb32to15= rgb32to15_MMX2; | |
208 rgb32to24= rgb32to24_MMX2; | |
209 rgb24tobgr15= rgb24tobgr15_MMX2; | |
210 rgb24tobgr16= rgb24tobgr16_MMX2; | |
211 rgb24tobgr24= rgb24tobgr24_MMX2; | |
212 rgb32tobgr32= rgb32tobgr32_MMX2; | |
213 rgb32tobgr16= rgb32tobgr16_MMX2; | |
214 rgb32tobgr15= rgb32tobgr15_MMX2; | |
215 yv12toyuy2= yv12toyuy2_MMX2; | |
216 yuv422ptoyuy2= yuv422ptoyuy2_MMX2; | |
217 yuy2toyv12= yuy2toyv12_MMX2; | |
218 uyvytoyv12= uyvytoyv12_MMX2; | |
219 yvu9toyv12= yvu9toyv12_MMX2; | |
220 planar2x= planar2x_MMX2; | |
221 rgb24toyv12= rgb24toyv12_MMX2; | |
222 interleaveBytes= interleaveBytes_MMX2; | |
223 vu9_to_vu12= vu9_to_vu12_MMX2; | |
224 yvu9_to_yuy2= yvu9_to_yuy2_MMX2; | |
225 }else if(flags & SWS_CPU_CAPS_3DNOW){ | |
226 rgb15to16= rgb15to16_3DNOW; | |
227 rgb15to24= rgb15to24_3DNOW; | |
228 rgb15to32= rgb15to32_3DNOW; | |
229 rgb16to24= rgb16to24_3DNOW; | |
230 rgb16to32= rgb16to32_3DNOW; | |
231 rgb16to15= rgb16to15_3DNOW; | |
232 rgb24to16= rgb24to16_3DNOW; | |
233 rgb24to15= rgb24to15_3DNOW; | |
234 rgb24to32= rgb24to32_3DNOW; | |
235 rgb32to16= rgb32to16_3DNOW; | |
236 rgb32to15= rgb32to15_3DNOW; | |
237 rgb32to24= rgb32to24_3DNOW; | |
238 rgb24tobgr15= rgb24tobgr15_3DNOW; | |
239 rgb24tobgr16= rgb24tobgr16_3DNOW; | |
240 rgb24tobgr24= rgb24tobgr24_3DNOW; | |
241 rgb32tobgr32= rgb32tobgr32_3DNOW; | |
242 rgb32tobgr16= rgb32tobgr16_3DNOW; | |
243 rgb32tobgr15= rgb32tobgr15_3DNOW; | |
244 yv12toyuy2= yv12toyuy2_3DNOW; | |
245 yuv422ptoyuy2= yuv422ptoyuy2_3DNOW; | |
246 yuy2toyv12= yuy2toyv12_3DNOW; | |
247 uyvytoyv12= uyvytoyv12_3DNOW; | |
248 yvu9toyv12= yvu9toyv12_3DNOW; | |
249 planar2x= planar2x_3DNOW; | |
250 rgb24toyv12= rgb24toyv12_3DNOW; | |
251 interleaveBytes= interleaveBytes_3DNOW; | |
252 vu9_to_vu12= vu9_to_vu12_3DNOW; | |
253 yvu9_to_yuy2= yvu9_to_yuy2_3DNOW; | |
254 }else if(flags & SWS_CPU_CAPS_MMX){ | |
255 rgb15to16= rgb15to16_MMX; | |
256 rgb15to24= rgb15to24_MMX; | |
257 rgb15to32= rgb15to32_MMX; | |
258 rgb16to24= rgb16to24_MMX; | |
259 rgb16to32= rgb16to32_MMX; | |
260 rgb16to15= rgb16to15_MMX; | |
261 rgb24to16= rgb24to16_MMX; | |
262 rgb24to15= rgb24to15_MMX; | |
263 rgb24to32= rgb24to32_MMX; | |
264 rgb32to16= rgb32to16_MMX; | |
265 rgb32to15= rgb32to15_MMX; | |
266 rgb32to24= rgb32to24_MMX; | |
267 rgb24tobgr15= rgb24tobgr15_MMX; | |
268 rgb24tobgr16= rgb24tobgr16_MMX; | |
269 rgb24tobgr24= rgb24tobgr24_MMX; | |
270 rgb32tobgr32= rgb32tobgr32_MMX; | |
271 rgb32tobgr16= rgb32tobgr16_MMX; | |
272 rgb32tobgr15= rgb32tobgr15_MMX; | |
273 yv12toyuy2= yv12toyuy2_MMX; | |
274 yuv422ptoyuy2= yuv422ptoyuy2_MMX; | |
275 yuy2toyv12= yuy2toyv12_MMX; | |
276 uyvytoyv12= uyvytoyv12_MMX; | |
277 yvu9toyv12= yvu9toyv12_MMX; | |
278 planar2x= planar2x_MMX; | |
279 rgb24toyv12= rgb24toyv12_MMX; | |
280 interleaveBytes= interleaveBytes_MMX; | |
281 vu9_to_vu12= vu9_to_vu12_MMX; | |
282 yvu9_to_yuy2= yvu9_to_yuy2_MMX; | |
283 }else | |
250 #endif | 284 #endif |
251 rgb15to16_C(src, dst, src_size); | 285 { |
252 } | 286 rgb15to16= rgb15to16_C; |
253 | 287 rgb15to24= rgb15to24_C; |
254 void rgb16to15(const uint8_t *src,uint8_t *dst,unsigned src_size) | 288 rgb15to32= rgb15to32_C; |
255 { | 289 rgb16to24= rgb16to24_C; |
256 #ifdef CAN_COMPILE_X86_ASM | 290 rgb16to32= rgb16to32_C; |
257 // ordered per speed fasterst first | 291 rgb16to15= rgb16to15_C; |
258 if(gCpuCaps.hasMMX2) | 292 rgb24to16= rgb24to16_C; |
259 rgb16to15_MMX2(src, dst, src_size); | 293 rgb24to15= rgb24to15_C; |
260 else if(gCpuCaps.has3DNow) | 294 rgb24to32= rgb24to32_C; |
261 rgb16to15_3DNow(src, dst, src_size); | 295 rgb32to16= rgb32to16_C; |
262 else if(gCpuCaps.hasMMX) | 296 rgb32to15= rgb32to15_C; |
263 rgb16to15_MMX(src, dst, src_size); | 297 rgb32to24= rgb32to24_C; |
264 else | 298 rgb24tobgr15= rgb24tobgr15_C; |
265 #endif | 299 rgb24tobgr16= rgb24tobgr16_C; |
266 rgb16to15_C(src, dst, src_size); | 300 rgb24tobgr24= rgb24tobgr24_C; |
267 } | 301 rgb32tobgr32= rgb32tobgr32_C; |
302 rgb32tobgr16= rgb32tobgr16_C; | |
303 rgb32tobgr15= rgb32tobgr15_C; | |
304 yv12toyuy2= yv12toyuy2_C; | |
305 yuv422ptoyuy2= yuv422ptoyuy2_C; | |
306 yuy2toyv12= yuy2toyv12_C; | |
307 // uyvytoyv12= uyvytoyv12_C; | |
308 // yvu9toyv12= yvu9toyv12_C; | |
309 planar2x= planar2x_C; | |
310 rgb24toyv12= rgb24toyv12_C; | |
311 interleaveBytes= interleaveBytes_C; | |
312 vu9_to_vu12= vu9_to_vu12_C; | |
313 yvu9_to_yuy2= yvu9_to_yuy2_C; | |
314 } | |
315 } | |
316 | |
268 /** | 317 /** |
269 * Pallete is assumed to contain bgr32 | 318 * Pallete is assumed to contain bgr32 |
270 */ | 319 */ |
271 void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) | 320 void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) |
272 { | 321 { |
339 dst[2]= palette[ src[i]*4+2 ]; | 388 dst[2]= palette[ src[i]*4+2 ]; |
340 dst+= 3; | 389 dst+= 3; |
341 } | 390 } |
342 } | 391 } |
343 | 392 |
344 void bgr24torgb24(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
345 { | |
346 #ifdef CAN_COMPILE_X86_ASM | |
347 // ordered per speed fasterst first | |
348 if(gCpuCaps.hasMMX2) | |
349 bgr24torgb24_MMX2(src, dst, src_size); | |
350 else if(gCpuCaps.has3DNow) | |
351 bgr24torgb24_3DNow(src, dst, src_size); | |
352 else if(gCpuCaps.hasMMX) | |
353 bgr24torgb24_MMX(src, dst, src_size); | |
354 else | |
355 bgr24torgb24_C(src, dst, src_size); | |
356 #else | |
357 bgr24torgb24_C(src, dst, src_size); | |
358 #endif | |
359 } | |
360 | |
361 void rgb32to16(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
362 { | |
363 #ifdef CAN_COMPILE_X86_ASM | |
364 // ordered per speed fasterst first | |
365 if(gCpuCaps.hasMMX2) | |
366 rgb32to16_MMX2(src, dst, src_size); | |
367 else if(gCpuCaps.has3DNow) | |
368 rgb32to16_3DNow(src, dst, src_size); | |
369 else if(gCpuCaps.hasMMX) | |
370 rgb32to16_MMX(src, dst, src_size); | |
371 else | |
372 #endif | |
373 rgb32to16_C(src, dst, src_size); | |
374 } | |
375 | |
376 void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
377 { | |
378 #ifdef CAN_COMPILE_X86_ASM | |
379 // ordered per speed fasterst first | |
380 if(gCpuCaps.hasMMX2) | |
381 rgb32to15_MMX2(src, dst, src_size); | |
382 else if(gCpuCaps.has3DNow) | |
383 rgb32to15_3DNow(src, dst, src_size); | |
384 else if(gCpuCaps.hasMMX) | |
385 rgb32to15_MMX(src, dst, src_size); | |
386 else | |
387 #endif | |
388 rgb32to15_C(src, dst, src_size); | |
389 } | |
390 | |
391 void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
392 { | |
393 #ifdef CAN_COMPILE_X86_ASM | |
394 // ordered per speed fasterst first | |
395 if(gCpuCaps.hasMMX2) | |
396 rgb24to16_MMX2(src, dst, src_size); | |
397 else if(gCpuCaps.has3DNow) | |
398 rgb24to16_3DNow(src, dst, src_size); | |
399 else if(gCpuCaps.hasMMX) | |
400 rgb24to16_MMX(src, dst, src_size); | |
401 else | |
402 #endif | |
403 rgb24to16_C(src, dst, src_size); | |
404 } | |
405 | |
406 void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
407 { | |
408 #ifdef CAN_COMPILE_X86_ASM | |
409 // ordered per speed fasterst first | |
410 if(gCpuCaps.hasMMX2) | |
411 rgb24to15_MMX2(src, dst, src_size); | |
412 else if(gCpuCaps.has3DNow) | |
413 rgb24to15_3DNow(src, dst, src_size); | |
414 else if(gCpuCaps.hasMMX) | |
415 rgb24to15_MMX(src, dst, src_size); | |
416 else | |
417 #endif | |
418 rgb24to15_C(src, dst, src_size); | |
419 } | |
420 | |
421 /** | 393 /** |
422 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette | 394 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette |
423 */ | 395 */ |
424 void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) | 396 void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) |
425 { | 397 { |
448 unsigned i; | 420 unsigned i; |
449 for(i=0; i<num_pixels; i++) | 421 for(i=0; i<num_pixels; i++) |
450 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]); | 422 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]); |
451 } | 423 } |
452 | 424 |
453 void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |
454 { | |
455 #ifdef CAN_COMPILE_X86_ASM | |
456 // ordered per speed fasterst first | |
457 if(gCpuCaps.hasMMX2) | |
458 rgb32tobgr32_MMX2(src, dst, src_size); | |
459 else if(gCpuCaps.has3DNow) | |
460 rgb32tobgr32_3DNow(src, dst, src_size); | |
461 else if(gCpuCaps.hasMMX) | |
462 rgb32tobgr32_MMX(src, dst, src_size); | |
463 else | |
464 #endif | |
465 rgb32tobgr32_C(src, dst, src_size); | |
466 } | |
467 | |
468 void rgb32tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) | 425 void rgb32tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
469 { | 426 { |
470 unsigned i; | 427 unsigned i; |
471 unsigned num_pixels = src_size >> 2; | 428 unsigned num_pixels = src_size >> 2; |
472 for(i=0; i<num_pixels; i++) | 429 for(i=0; i<num_pixels; i++) |
473 { | 430 { |
474 dst[3*i + 0] = src[4*i + 2]; | 431 dst[3*i + 0] = src[4*i + 2]; |
475 dst[3*i + 1] = src[4*i + 1]; | 432 dst[3*i + 1] = src[4*i + 1]; |
476 dst[3*i + 2] = src[4*i + 0]; | 433 dst[3*i + 2] = src[4*i + 0]; |
477 } | 434 } |
478 } | |
479 | |
480 void rgb32tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |
481 { | |
482 #ifdef CAN_COMPILE_X86_ASM | |
483 // ordered per speed fasterst first | |
484 if(gCpuCaps.hasMMX2) | |
485 rgb32tobgr16_MMX2(src, dst, src_size); | |
486 else if(gCpuCaps.has3DNow) | |
487 rgb32tobgr16_3DNow(src, dst, src_size); | |
488 else if(gCpuCaps.hasMMX) | |
489 rgb32tobgr16_MMX(src, dst, src_size); | |
490 else | |
491 #endif | |
492 rgb32tobgr16_C(src, dst, src_size); | |
493 } | |
494 | |
495 void rgb32tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |
496 { | |
497 #ifdef CAN_COMPILE_X86_ASM | |
498 // ordered per speed fasterst first | |
499 if(gCpuCaps.hasMMX2) | |
500 rgb32tobgr15_MMX2(src, dst, src_size); | |
501 else if(gCpuCaps.has3DNow) | |
502 rgb32tobgr15_3DNow(src, dst, src_size); | |
503 else if(gCpuCaps.hasMMX) | |
504 rgb32tobgr15_MMX(src, dst, src_size); | |
505 else | |
506 #endif | |
507 rgb32tobgr15_C(src, dst, src_size); | |
508 } | 435 } |
509 | 436 |
510 void rgb24tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) | 437 void rgb24tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
511 { | 438 { |
512 unsigned i; | 439 unsigned i; |
515 dst[4*i + 0] = src[3*i + 2]; | 442 dst[4*i + 0] = src[3*i + 2]; |
516 dst[4*i + 1] = src[3*i + 1]; | 443 dst[4*i + 1] = src[3*i + 1]; |
517 dst[4*i + 2] = src[3*i + 0]; | 444 dst[4*i + 2] = src[3*i + 0]; |
518 dst[4*i + 3] = 0; | 445 dst[4*i + 3] = 0; |
519 } | 446 } |
520 } | |
521 | |
522 void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |
523 { | |
524 #ifdef CAN_COMPILE_X86_ASM | |
525 // ordered per speed fasterst first | |
526 if(gCpuCaps.hasMMX2) | |
527 rgb24tobgr24_MMX2(src, dst, src_size); | |
528 else if(gCpuCaps.has3DNow) | |
529 rgb24tobgr24_3DNow(src, dst, src_size); | |
530 else if(gCpuCaps.hasMMX) | |
531 rgb24tobgr24_MMX(src, dst, src_size); | |
532 else | |
533 #endif | |
534 rgb24tobgr24_C(src, dst, src_size); | |
535 } | |
536 | |
537 void rgb24tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |
538 { | |
539 #ifdef CAN_COMPILE_X86_ASM | |
540 // ordered per speed fasterst first | |
541 if(gCpuCaps.hasMMX2) | |
542 rgb24tobgr16_MMX2(src, dst, src_size); | |
543 else if(gCpuCaps.has3DNow) | |
544 rgb24tobgr16_3DNow(src, dst, src_size); | |
545 else if(gCpuCaps.hasMMX) | |
546 rgb24tobgr16_MMX(src, dst, src_size); | |
547 else | |
548 #endif | |
549 rgb24tobgr16_C(src, dst, src_size); | |
550 } | |
551 | |
552 void rgb24tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size) | |
553 { | |
554 #ifdef CAN_COMPILE_X86_ASM | |
555 // ordered per speed fasterst first | |
556 if(gCpuCaps.hasMMX2) | |
557 rgb24tobgr15_MMX2(src, dst, src_size); | |
558 else if(gCpuCaps.has3DNow) | |
559 rgb24tobgr15_3DNow(src, dst, src_size); | |
560 else if(gCpuCaps.hasMMX) | |
561 rgb24tobgr15_MMX(src, dst, src_size); | |
562 else | |
563 #endif | |
564 rgb24tobgr15_C(src, dst, src_size); | |
565 } | 447 } |
566 | 448 |
567 void rgb16tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) | 449 void rgb16tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
568 { | 450 { |
569 const uint16_t *end; | 451 const uint16_t *end; |
711 g = (rgb&0x38)>>3; | 593 g = (rgb&0x38)>>3; |
712 b = (rgb&0xC0)>>6; | 594 b = (rgb&0xC0)>>6; |
713 dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6); | 595 dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6); |
714 } | 596 } |
715 } | 597 } |
716 | |
717 /** | |
718 * | |
719 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
720 * problem for anyone then tell me, and ill fix it) | |
721 */ | |
722 void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
723 unsigned int width, unsigned int height, | |
724 int lumStride, int chromStride, int dstStride) | |
725 { | |
726 #ifdef CAN_COMPILE_X86_ASM | |
727 // ordered per speed fasterst first | |
728 if(gCpuCaps.hasMMX2) | |
729 yv12toyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |
730 else if(gCpuCaps.has3DNow) | |
731 yv12toyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |
732 else if(gCpuCaps.hasMMX) | |
733 yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |
734 else | |
735 #endif | |
736 yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |
737 } | |
738 | |
739 /** | |
740 * | |
741 * width should be a multiple of 16 | |
742 */ | |
743 void yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
744 unsigned int width, unsigned int height, | |
745 int lumStride, int chromStride, int dstStride) | |
746 { | |
747 #ifdef CAN_COMPILE_X86_ASM | |
748 // ordered per speed fasterst first | |
749 if(gCpuCaps.hasMMX2) | |
750 yuv422ptoyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |
751 else if(gCpuCaps.has3DNow) | |
752 yuv422ptoyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |
753 else if(gCpuCaps.hasMMX) | |
754 yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |
755 else | |
756 #endif | |
757 yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); | |
758 } | |
759 | |
760 /** | |
761 * | |
762 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
763 * problem for anyone then tell me, and ill fix it) | |
764 */ | |
765 void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
766 unsigned int width, unsigned int height, | |
767 int lumStride, int chromStride, int srcStride) | |
768 { | |
769 #ifdef CAN_COMPILE_X86_ASM | |
770 // ordered per speed fasterst first | |
771 if(gCpuCaps.hasMMX2) | |
772 yuy2toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
773 else if(gCpuCaps.has3DNow) | |
774 yuy2toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
775 else if(gCpuCaps.hasMMX) | |
776 yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
777 else | |
778 #endif | |
779 yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
780 } | |
781 | |
782 /** | |
783 * | |
784 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
785 * problem for anyone then tell me, and ill fix it) | |
786 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version | |
787 */ | |
788 void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
789 unsigned int width, unsigned int height, | |
790 int lumStride, int chromStride, int srcStride) | |
791 { | |
792 #ifdef CAN_COMPILE_X86_ASM | |
793 // ordered per speed fasterst first | |
794 if(gCpuCaps.hasMMX2) | |
795 uyvytoyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
796 else if(gCpuCaps.has3DNow) | |
797 uyvytoyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
798 else if(gCpuCaps.hasMMX) | |
799 uyvytoyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
800 else | |
801 uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
802 #else | |
803 uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
804 #endif | |
805 } | |
806 | |
807 void yvu9toyv12(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, | |
808 uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
809 unsigned int width, unsigned int height, | |
810 int lumStride, int chromStride) | |
811 { | |
812 #ifdef CAN_COMPILE_X86_ASM | |
813 // ordered per speed fasterst first | |
814 if(gCpuCaps.hasMMX2) | |
815 yvu9toyv12_MMX2(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride); | |
816 else if(gCpuCaps.has3DNow) | |
817 yvu9toyv12_3DNow(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride); | |
818 else if(gCpuCaps.hasMMX) | |
819 yvu9toyv12_MMX(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride); | |
820 else | |
821 yvu9toyv12_C(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride); | |
822 #else | |
823 yvu9toyv12_C(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride); | |
824 #endif | |
825 } | |
826 | |
827 void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride) | |
828 { | |
829 #ifdef CAN_COMPILE_X86_ASM | |
830 // ordered per speed fasterst first | |
831 if(gCpuCaps.hasMMX2) | |
832 planar2x_MMX2(src, dst, width, height, srcStride, dstStride); | |
833 else if(gCpuCaps.has3DNow) | |
834 planar2x_3DNow(src, dst, width, height, srcStride, dstStride); | |
835 else | |
836 #endif | |
837 planar2x_C(src, dst, width, height, srcStride, dstStride); | |
838 } | |
839 | |
840 /** | |
841 * | |
842 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a | |
843 * problem for anyone then tell me, and ill fix it) | |
844 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version | |
845 */ | |
846 void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
847 unsigned int width, unsigned int height, | |
848 int lumStride, int chromStride, int srcStride) | |
849 { | |
850 #ifdef CAN_COMPILE_X86_ASM | |
851 // ordered per speed fasterst first | |
852 if(gCpuCaps.hasMMX2) | |
853 rgb24toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
854 else if(gCpuCaps.has3DNow) | |
855 rgb24toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
856 else if(gCpuCaps.hasMMX) | |
857 rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
858 else | |
859 #endif | |
860 rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); | |
861 } | |
862 | |
863 void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, | |
864 unsigned width, unsigned height, int src1Stride, | |
865 int src2Stride, int dstStride) | |
866 { | |
867 #ifdef CAN_COMPILE_X86_ASM | |
868 // ordered per speed fasterst first | |
869 if(gCpuCaps.hasMMX2) | |
870 interleaveBytes_MMX2(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); | |
871 else if(gCpuCaps.has3DNow) | |
872 interleaveBytes_3DNow(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); | |
873 else if(gCpuCaps.hasMMX) | |
874 interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); | |
875 else | |
876 #endif | |
877 interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); | |
878 } | |
879 | |
880 void vu9_to_vu12(const uint8_t *src1, const uint8_t *src2, | |
881 uint8_t *dst1, uint8_t *dst2, | |
882 unsigned width, unsigned height, | |
883 int srcStride1, int srcStride2, | |
884 int dstStride1, int dstStride2) | |
885 { | |
886 #ifdef CAN_COMPILE_X86_ASM | |
887 if(gCpuCaps.hasMMX2) | |
888 vu9_to_vu12_MMX2(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2); | |
889 else if(gCpuCaps.has3DNow) | |
890 vu9_to_vu12_3DNow(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2); | |
891 else if(gCpuCaps.hasMMX) | |
892 vu9_to_vu12_MMX(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2); | |
893 else | |
894 #endif | |
895 vu9_to_vu12_C(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2); | |
896 } | |
897 | |
898 void yvu9_to_yuy2(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, | |
899 uint8_t *dst, | |
900 unsigned width, unsigned height, | |
901 int srcStride1, int srcStride2, | |
902 int srcStride3, int dstStride) | |
903 { | |
904 #ifdef CAN_COMPILE_X86_ASM | |
905 if(gCpuCaps.hasMMX2) | |
906 yvu9_to_yuy2_MMX2(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride); | |
907 else if(gCpuCaps.has3DNow) | |
908 yvu9_to_yuy2_3DNow(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride); | |
909 else if(gCpuCaps.hasMMX) | |
910 yvu9_to_yuy2_MMX(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride); | |
911 else | |
912 #endif | |
913 yvu9_to_yuy2_C(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride); | |
914 } |