comparison postproc/rgb2rgb.c @ 9987:988c2ffc5bc1

remove remaining cpudetect dependancy
author michael
date Fri, 25 Apr 2003 17:16:55 +0000
parents 50ef22bcc0c3
children f33f908ae085
comparison
equal deleted inserted replaced
9986:4bdd248d372e 9987:988c2ffc5bc1
8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) 8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL)
9 */ 9 */
10 #include <inttypes.h> 10 #include <inttypes.h>
11 #include "../config.h" 11 #include "../config.h"
12 #include "rgb2rgb.h" 12 #include "rgb2rgb.h"
13 #include "../cpudetect.h" 13 #include "swscale.h"
14 #include "../mangle.h" 14 #include "../mangle.h"
15 #include "../bswap.h" 15 #include "../bswap.h"
16 #include "../libvo/fastmemcpy.h" 16 #include "../libvo/fastmemcpy.h"
17 17
18 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
19
20 void (*rgb24to32)(const uint8_t *src,uint8_t *dst,unsigned src_size);
21 void (*rgb24to16)(const uint8_t *src,uint8_t *dst,unsigned src_size);
22 void (*rgb24to15)(const uint8_t *src,uint8_t *dst,unsigned src_size);
23 void (*rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned src_size);
24 void (*rgb32to16)(const uint8_t *src,uint8_t *dst,unsigned src_size);
25 void (*rgb32to15)(const uint8_t *src,uint8_t *dst,unsigned src_size);
26 void (*rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size);
27 void (*rgb15to24)(const uint8_t *src,uint8_t *dst,unsigned src_size);
28 void (*rgb15to32)(const uint8_t *src,uint8_t *dst,unsigned src_size);
29 void (*rgb16to15)(const uint8_t *src,uint8_t *dst,unsigned src_size);
30 void (*rgb16to24)(const uint8_t *src,uint8_t *dst,unsigned src_size);
31 void (*rgb16to32)(const uint8_t *src,uint8_t *dst,unsigned src_size);
32 //void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, unsigned src_size);
33 void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned src_size);
34 void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, unsigned src_size);
35 void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size);
36 void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned src_size);
37 //void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, unsigned src_size);
38 void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, unsigned src_size);
39 void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size);
40
41 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
42 unsigned int width, unsigned int height,
43 int lumStride, int chromStride, int dstStride);
44 void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
45 unsigned int width, unsigned int height,
46 int lumStride, int chromStride, int dstStride);
47 void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
48 unsigned int width, unsigned int height,
49 int lumStride, int chromStride, int srcStride);
50 void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
51 unsigned int width, unsigned int height,
52 int lumStride, int chromStride, int srcStride);
53 void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride);
54 void (*interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst,
55 unsigned width, unsigned height, int src1Stride,
56 int src2Stride, int dstStride);
57 void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
58 uint8_t *dst1, uint8_t *dst2,
59 unsigned width, unsigned height,
60 int srcStride1, int srcStride2,
61 int dstStride1, int dstStride2);
62 void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
63 uint8_t *dst,
64 unsigned width, unsigned height,
65 int srcStride1, int srcStride2,
66 int srcStride3, int dstStride);
67
18 #ifdef ARCH_X86 68 #ifdef ARCH_X86
19 #define CAN_COMPILE_X86_ASM
20 #endif
21
22 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
23
24 #ifdef CAN_COMPILE_X86_ASM
25 static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL; 69 static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL;
26 static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; 70 static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
27 static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL; 71 static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL;
28 static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; 72 static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
29 static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; 73 static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
104 #undef ARCH_X86 148 #undef ARCH_X86
105 #undef HAVE_SSE2 149 #undef HAVE_SSE2
106 #define RENAME(a) a ## _C 150 #define RENAME(a) a ## _C
107 #include "rgb2rgb_template.c" 151 #include "rgb2rgb_template.c"
108 152
109 #ifdef CAN_COMPILE_X86_ASM 153 #ifdef ARCH_X86
110 154
111 //MMX versions 155 //MMX versions
112 #undef RENAME 156 #undef RENAME
113 #define HAVE_MMX 157 #define HAVE_MMX
114 #undef HAVE_MMX2 158 #undef HAVE_MMX2
136 #undef HAVE_SSE2 180 #undef HAVE_SSE2
137 #define ARCH_X86 181 #define ARCH_X86
138 #define RENAME(a) a ## _3DNow 182 #define RENAME(a) a ## _3DNow
139 #include "rgb2rgb_template.c" 183 #include "rgb2rgb_template.c"
140 184
141 #endif //CAN_COMPILE_X86_ASM 185 #endif //ARCH_X86
142
143 void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
144 {
145 #ifdef CAN_COMPILE_X86_ASM
146 // ordered per speed fasterst first
147 if(gCpuCaps.hasMMX2)
148 rgb24to32_MMX2(src, dst, src_size);
149 else if(gCpuCaps.has3DNow)
150 rgb24to32_3DNow(src, dst, src_size);
151 else if(gCpuCaps.hasMMX)
152 rgb24to32_MMX(src, dst, src_size);
153 else
154 #endif
155 rgb24to32_C(src, dst, src_size);
156 }
157
158 void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
159 {
160 #ifdef CAN_COMPILE_X86_ASM
161 // ordered per speed fasterst first
162 if(gCpuCaps.hasMMX2)
163 rgb15to24_MMX2(src, dst, src_size);
164 else if(gCpuCaps.has3DNow)
165 rgb15to24_3DNow(src, dst, src_size);
166 else if(gCpuCaps.hasMMX)
167 rgb15to24_MMX(src, dst, src_size);
168 else
169 #endif
170 rgb15to24_C(src, dst, src_size);
171 }
172
173 void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
174 {
175 #ifdef CAN_COMPILE_X86_ASM
176 // ordered per speed fasterst first
177 if(gCpuCaps.hasMMX2)
178 rgb16to24_MMX2(src, dst, src_size);
179 else if(gCpuCaps.has3DNow)
180 rgb16to24_3DNow(src, dst, src_size);
181 else if(gCpuCaps.hasMMX)
182 rgb16to24_MMX(src, dst, src_size);
183 else
184 #endif
185 rgb16to24_C(src, dst, src_size);
186 }
187
188 void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
189 {
190 #ifdef CAN_COMPILE_X86_ASM
191 // ordered per speed fasterst first
192 if(gCpuCaps.hasMMX2)
193 rgb15to32_MMX2(src, dst, src_size);
194 else if(gCpuCaps.has3DNow)
195 rgb15to32_3DNow(src, dst, src_size);
196 else if(gCpuCaps.hasMMX)
197 rgb15to32_MMX(src, dst, src_size);
198 else
199 #endif
200 rgb15to32_C(src, dst, src_size);
201 }
202
203 void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
204 {
205 #ifdef CAN_COMPILE_X86_ASM
206 // ordered per speed fasterst first
207 if(gCpuCaps.hasMMX2)
208 rgb16to32_MMX2(src, dst, src_size);
209 else if(gCpuCaps.has3DNow)
210 rgb16to32_3DNow(src, dst, src_size);
211 else if(gCpuCaps.hasMMX)
212 rgb16to32_MMX(src, dst, src_size);
213 else
214 #endif
215 rgb16to32_C(src, dst, src_size);
216 }
217
218 void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
219 {
220 #ifdef CAN_COMPILE_X86_ASM
221 // ordered per speed fasterst first
222 if(gCpuCaps.hasMMX2)
223 rgb32to24_MMX2(src, dst, src_size);
224 else if(gCpuCaps.has3DNow)
225 rgb32to24_3DNow(src, dst, src_size);
226 else if(gCpuCaps.hasMMX)
227 rgb32to24_MMX(src, dst, src_size);
228 else
229 #endif
230 rgb32to24_C(src, dst, src_size);
231 }
232 186
233 /* 187 /*
234 Original by Strepto/Astral 188 rgb15->rgb16 Original by Strepto/Astral
235 ported to gcc & bugfixed : A'rpi 189 ported to gcc & bugfixed : A'rpi
236 MMX2, 3DNOW optimization by Nick Kurshev 190 MMX2, 3DNOW optimization by Nick Kurshev
237 32bit c version, and and&add trick by Michael Niedermayer 191 32bit c version, and and&add trick by Michael Niedermayer
238 */ 192 */
239 void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size) 193
240 { 194 void sws_rgb2rgb_init(int flags){
241 #ifdef CAN_COMPILE_X86_ASM 195 #ifdef ARCH_X86
242 // ordered per speed fasterst first 196 if(flags & SWS_CPU_CAPS_MMX2){
243 if(gCpuCaps.hasMMX2) 197 rgb15to16= rgb15to16_MMX2;
244 rgb15to16_MMX2(src, dst, src_size); 198 rgb15to24= rgb15to24_MMX2;
245 else if(gCpuCaps.has3DNow) 199 rgb15to32= rgb15to32_MMX2;
246 rgb15to16_3DNow(src, dst, src_size); 200 rgb16to24= rgb16to24_MMX2;
247 else if(gCpuCaps.hasMMX) 201 rgb16to32= rgb16to32_MMX2;
248 rgb15to16_MMX(src, dst, src_size); 202 rgb16to15= rgb16to15_MMX2;
249 else 203 rgb24to16= rgb24to16_MMX2;
204 rgb24to15= rgb24to15_MMX2;
205 rgb24to32= rgb24to32_MMX2;
206 rgb32to16= rgb32to16_MMX2;
207 rgb32to15= rgb32to15_MMX2;
208 rgb32to24= rgb32to24_MMX2;
209 rgb24tobgr15= rgb24tobgr15_MMX2;
210 rgb24tobgr16= rgb24tobgr16_MMX2;
211 rgb24tobgr24= rgb24tobgr24_MMX2;
212 rgb32tobgr32= rgb32tobgr32_MMX2;
213 rgb32tobgr16= rgb32tobgr16_MMX2;
214 rgb32tobgr15= rgb32tobgr15_MMX2;
215 yv12toyuy2= yv12toyuy2_MMX2;
216 yuv422ptoyuy2= yuv422ptoyuy2_MMX2;
217 yuy2toyv12= yuy2toyv12_MMX2;
218 uyvytoyv12= uyvytoyv12_MMX2;
219 yvu9toyv12= yvu9toyv12_MMX2;
220 planar2x= planar2x_MMX2;
221 rgb24toyv12= rgb24toyv12_MMX2;
222 interleaveBytes= interleaveBytes_MMX2;
223 vu9_to_vu12= vu9_to_vu12_MMX2;
224 yvu9_to_yuy2= yvu9_to_yuy2_MMX2;
225 }else if(flags & SWS_CPU_CAPS_3DNOW){
226 rgb15to16= rgb15to16_3DNOW;
227 rgb15to24= rgb15to24_3DNOW;
228 rgb15to32= rgb15to32_3DNOW;
229 rgb16to24= rgb16to24_3DNOW;
230 rgb16to32= rgb16to32_3DNOW;
231 rgb16to15= rgb16to15_3DNOW;
232 rgb24to16= rgb24to16_3DNOW;
233 rgb24to15= rgb24to15_3DNOW;
234 rgb24to32= rgb24to32_3DNOW;
235 rgb32to16= rgb32to16_3DNOW;
236 rgb32to15= rgb32to15_3DNOW;
237 rgb32to24= rgb32to24_3DNOW;
238 rgb24tobgr15= rgb24tobgr15_3DNOW;
239 rgb24tobgr16= rgb24tobgr16_3DNOW;
240 rgb24tobgr24= rgb24tobgr24_3DNOW;
241 rgb32tobgr32= rgb32tobgr32_3DNOW;
242 rgb32tobgr16= rgb32tobgr16_3DNOW;
243 rgb32tobgr15= rgb32tobgr15_3DNOW;
244 yv12toyuy2= yv12toyuy2_3DNOW;
245 yuv422ptoyuy2= yuv422ptoyuy2_3DNOW;
246 yuy2toyv12= yuy2toyv12_3DNOW;
247 uyvytoyv12= uyvytoyv12_3DNOW;
248 yvu9toyv12= yvu9toyv12_3DNOW;
249 planar2x= planar2x_3DNOW;
250 rgb24toyv12= rgb24toyv12_3DNOW;
251 interleaveBytes= interleaveBytes_3DNOW;
252 vu9_to_vu12= vu9_to_vu12_3DNOW;
253 yvu9_to_yuy2= yvu9_to_yuy2_3DNOW;
254 }else if(flags & SWS_CPU_CAPS_MMX){
255 rgb15to16= rgb15to16_MMX;
256 rgb15to24= rgb15to24_MMX;
257 rgb15to32= rgb15to32_MMX;
258 rgb16to24= rgb16to24_MMX;
259 rgb16to32= rgb16to32_MMX;
260 rgb16to15= rgb16to15_MMX;
261 rgb24to16= rgb24to16_MMX;
262 rgb24to15= rgb24to15_MMX;
263 rgb24to32= rgb24to32_MMX;
264 rgb32to16= rgb32to16_MMX;
265 rgb32to15= rgb32to15_MMX;
266 rgb32to24= rgb32to24_MMX;
267 rgb24tobgr15= rgb24tobgr15_MMX;
268 rgb24tobgr16= rgb24tobgr16_MMX;
269 rgb24tobgr24= rgb24tobgr24_MMX;
270 rgb32tobgr32= rgb32tobgr32_MMX;
271 rgb32tobgr16= rgb32tobgr16_MMX;
272 rgb32tobgr15= rgb32tobgr15_MMX;
273 yv12toyuy2= yv12toyuy2_MMX;
274 yuv422ptoyuy2= yuv422ptoyuy2_MMX;
275 yuy2toyv12= yuy2toyv12_MMX;
276 uyvytoyv12= uyvytoyv12_MMX;
277 yvu9toyv12= yvu9toyv12_MMX;
278 planar2x= planar2x_MMX;
279 rgb24toyv12= rgb24toyv12_MMX;
280 interleaveBytes= interleaveBytes_MMX;
281 vu9_to_vu12= vu9_to_vu12_MMX;
282 yvu9_to_yuy2= yvu9_to_yuy2_MMX;
283 }else
250 #endif 284 #endif
251 rgb15to16_C(src, dst, src_size); 285 {
252 } 286 rgb15to16= rgb15to16_C;
253 287 rgb15to24= rgb15to24_C;
254 void rgb16to15(const uint8_t *src,uint8_t *dst,unsigned src_size) 288 rgb15to32= rgb15to32_C;
255 { 289 rgb16to24= rgb16to24_C;
256 #ifdef CAN_COMPILE_X86_ASM 290 rgb16to32= rgb16to32_C;
257 // ordered per speed fasterst first 291 rgb16to15= rgb16to15_C;
258 if(gCpuCaps.hasMMX2) 292 rgb24to16= rgb24to16_C;
259 rgb16to15_MMX2(src, dst, src_size); 293 rgb24to15= rgb24to15_C;
260 else if(gCpuCaps.has3DNow) 294 rgb24to32= rgb24to32_C;
261 rgb16to15_3DNow(src, dst, src_size); 295 rgb32to16= rgb32to16_C;
262 else if(gCpuCaps.hasMMX) 296 rgb32to15= rgb32to15_C;
263 rgb16to15_MMX(src, dst, src_size); 297 rgb32to24= rgb32to24_C;
264 else 298 rgb24tobgr15= rgb24tobgr15_C;
265 #endif 299 rgb24tobgr16= rgb24tobgr16_C;
266 rgb16to15_C(src, dst, src_size); 300 rgb24tobgr24= rgb24tobgr24_C;
267 } 301 rgb32tobgr32= rgb32tobgr32_C;
302 rgb32tobgr16= rgb32tobgr16_C;
303 rgb32tobgr15= rgb32tobgr15_C;
304 yv12toyuy2= yv12toyuy2_C;
305 yuv422ptoyuy2= yuv422ptoyuy2_C;
306 yuy2toyv12= yuy2toyv12_C;
307 // uyvytoyv12= uyvytoyv12_C;
308 // yvu9toyv12= yvu9toyv12_C;
309 planar2x= planar2x_C;
310 rgb24toyv12= rgb24toyv12_C;
311 interleaveBytes= interleaveBytes_C;
312 vu9_to_vu12= vu9_to_vu12_C;
313 yvu9_to_yuy2= yvu9_to_yuy2_C;
314 }
315 }
316
268 /** 317 /**
269 * Pallete is assumed to contain bgr32 318 * Pallete is assumed to contain bgr32
270 */ 319 */
271 void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) 320 void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
272 { 321 {
339 dst[2]= palette[ src[i]*4+2 ]; 388 dst[2]= palette[ src[i]*4+2 ];
340 dst+= 3; 389 dst+= 3;
341 } 390 }
342 } 391 }
343 392
344 void bgr24torgb24(const uint8_t *src, uint8_t *dst, unsigned src_size)
345 {
346 #ifdef CAN_COMPILE_X86_ASM
347 // ordered per speed fasterst first
348 if(gCpuCaps.hasMMX2)
349 bgr24torgb24_MMX2(src, dst, src_size);
350 else if(gCpuCaps.has3DNow)
351 bgr24torgb24_3DNow(src, dst, src_size);
352 else if(gCpuCaps.hasMMX)
353 bgr24torgb24_MMX(src, dst, src_size);
354 else
355 bgr24torgb24_C(src, dst, src_size);
356 #else
357 bgr24torgb24_C(src, dst, src_size);
358 #endif
359 }
360
361 void rgb32to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
362 {
363 #ifdef CAN_COMPILE_X86_ASM
364 // ordered per speed fasterst first
365 if(gCpuCaps.hasMMX2)
366 rgb32to16_MMX2(src, dst, src_size);
367 else if(gCpuCaps.has3DNow)
368 rgb32to16_3DNow(src, dst, src_size);
369 else if(gCpuCaps.hasMMX)
370 rgb32to16_MMX(src, dst, src_size);
371 else
372 #endif
373 rgb32to16_C(src, dst, src_size);
374 }
375
376 void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
377 {
378 #ifdef CAN_COMPILE_X86_ASM
379 // ordered per speed fasterst first
380 if(gCpuCaps.hasMMX2)
381 rgb32to15_MMX2(src, dst, src_size);
382 else if(gCpuCaps.has3DNow)
383 rgb32to15_3DNow(src, dst, src_size);
384 else if(gCpuCaps.hasMMX)
385 rgb32to15_MMX(src, dst, src_size);
386 else
387 #endif
388 rgb32to15_C(src, dst, src_size);
389 }
390
391 void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
392 {
393 #ifdef CAN_COMPILE_X86_ASM
394 // ordered per speed fasterst first
395 if(gCpuCaps.hasMMX2)
396 rgb24to16_MMX2(src, dst, src_size);
397 else if(gCpuCaps.has3DNow)
398 rgb24to16_3DNow(src, dst, src_size);
399 else if(gCpuCaps.hasMMX)
400 rgb24to16_MMX(src, dst, src_size);
401 else
402 #endif
403 rgb24to16_C(src, dst, src_size);
404 }
405
406 void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
407 {
408 #ifdef CAN_COMPILE_X86_ASM
409 // ordered per speed fasterst first
410 if(gCpuCaps.hasMMX2)
411 rgb24to15_MMX2(src, dst, src_size);
412 else if(gCpuCaps.has3DNow)
413 rgb24to15_3DNow(src, dst, src_size);
414 else if(gCpuCaps.hasMMX)
415 rgb24to15_MMX(src, dst, src_size);
416 else
417 #endif
418 rgb24to15_C(src, dst, src_size);
419 }
420
421 /** 393 /**
422 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette 394 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
423 */ 395 */
424 void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) 396 void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
425 { 397 {
448 unsigned i; 420 unsigned i;
449 for(i=0; i<num_pixels; i++) 421 for(i=0; i<num_pixels; i++)
450 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]); 422 ((uint16_t *)dst)[i] = bswap_16(((uint16_t *)palette)[ src[i] ]);
451 } 423 }
452 424
453 void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
454 {
455 #ifdef CAN_COMPILE_X86_ASM
456 // ordered per speed fasterst first
457 if(gCpuCaps.hasMMX2)
458 rgb32tobgr32_MMX2(src, dst, src_size);
459 else if(gCpuCaps.has3DNow)
460 rgb32tobgr32_3DNow(src, dst, src_size);
461 else if(gCpuCaps.hasMMX)
462 rgb32tobgr32_MMX(src, dst, src_size);
463 else
464 #endif
465 rgb32tobgr32_C(src, dst, src_size);
466 }
467
468 void rgb32tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) 425 void rgb32tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
469 { 426 {
470 unsigned i; 427 unsigned i;
471 unsigned num_pixels = src_size >> 2; 428 unsigned num_pixels = src_size >> 2;
472 for(i=0; i<num_pixels; i++) 429 for(i=0; i<num_pixels; i++)
473 { 430 {
474 dst[3*i + 0] = src[4*i + 2]; 431 dst[3*i + 0] = src[4*i + 2];
475 dst[3*i + 1] = src[4*i + 1]; 432 dst[3*i + 1] = src[4*i + 1];
476 dst[3*i + 2] = src[4*i + 0]; 433 dst[3*i + 2] = src[4*i + 0];
477 } 434 }
478 }
479
480 void rgb32tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
481 {
482 #ifdef CAN_COMPILE_X86_ASM
483 // ordered per speed fasterst first
484 if(gCpuCaps.hasMMX2)
485 rgb32tobgr16_MMX2(src, dst, src_size);
486 else if(gCpuCaps.has3DNow)
487 rgb32tobgr16_3DNow(src, dst, src_size);
488 else if(gCpuCaps.hasMMX)
489 rgb32tobgr16_MMX(src, dst, src_size);
490 else
491 #endif
492 rgb32tobgr16_C(src, dst, src_size);
493 }
494
495 void rgb32tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
496 {
497 #ifdef CAN_COMPILE_X86_ASM
498 // ordered per speed fasterst first
499 if(gCpuCaps.hasMMX2)
500 rgb32tobgr15_MMX2(src, dst, src_size);
501 else if(gCpuCaps.has3DNow)
502 rgb32tobgr15_3DNow(src, dst, src_size);
503 else if(gCpuCaps.hasMMX)
504 rgb32tobgr15_MMX(src, dst, src_size);
505 else
506 #endif
507 rgb32tobgr15_C(src, dst, src_size);
508 } 435 }
509 436
510 void rgb24tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) 437 void rgb24tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
511 { 438 {
512 unsigned i; 439 unsigned i;
515 dst[4*i + 0] = src[3*i + 2]; 442 dst[4*i + 0] = src[3*i + 2];
516 dst[4*i + 1] = src[3*i + 1]; 443 dst[4*i + 1] = src[3*i + 1];
517 dst[4*i + 2] = src[3*i + 0]; 444 dst[4*i + 2] = src[3*i + 0];
518 dst[4*i + 3] = 0; 445 dst[4*i + 3] = 0;
519 } 446 }
520 }
521
522 void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
523 {
524 #ifdef CAN_COMPILE_X86_ASM
525 // ordered per speed fasterst first
526 if(gCpuCaps.hasMMX2)
527 rgb24tobgr24_MMX2(src, dst, src_size);
528 else if(gCpuCaps.has3DNow)
529 rgb24tobgr24_3DNow(src, dst, src_size);
530 else if(gCpuCaps.hasMMX)
531 rgb24tobgr24_MMX(src, dst, src_size);
532 else
533 #endif
534 rgb24tobgr24_C(src, dst, src_size);
535 }
536
537 void rgb24tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size)
538 {
539 #ifdef CAN_COMPILE_X86_ASM
540 // ordered per speed fasterst first
541 if(gCpuCaps.hasMMX2)
542 rgb24tobgr16_MMX2(src, dst, src_size);
543 else if(gCpuCaps.has3DNow)
544 rgb24tobgr16_3DNow(src, dst, src_size);
545 else if(gCpuCaps.hasMMX)
546 rgb24tobgr16_MMX(src, dst, src_size);
547 else
548 #endif
549 rgb24tobgr16_C(src, dst, src_size);
550 }
551
552 void rgb24tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size)
553 {
554 #ifdef CAN_COMPILE_X86_ASM
555 // ordered per speed fasterst first
556 if(gCpuCaps.hasMMX2)
557 rgb24tobgr15_MMX2(src, dst, src_size);
558 else if(gCpuCaps.has3DNow)
559 rgb24tobgr15_3DNow(src, dst, src_size);
560 else if(gCpuCaps.hasMMX)
561 rgb24tobgr15_MMX(src, dst, src_size);
562 else
563 #endif
564 rgb24tobgr15_C(src, dst, src_size);
565 } 447 }
566 448
567 void rgb16tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) 449 void rgb16tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
568 { 450 {
569 const uint16_t *end; 451 const uint16_t *end;
711 g = (rgb&0x38)>>3; 593 g = (rgb&0x38)>>3;
712 b = (rgb&0xC0)>>6; 594 b = (rgb&0xC0)>>6;
713 dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6); 595 dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6);
714 } 596 }
715 } 597 }
716
717 /**
718 *
719 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
720 * problem for anyone then tell me, and ill fix it)
721 */
722 void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
723 unsigned int width, unsigned int height,
724 int lumStride, int chromStride, int dstStride)
725 {
726 #ifdef CAN_COMPILE_X86_ASM
727 // ordered per speed fasterst first
728 if(gCpuCaps.hasMMX2)
729 yv12toyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
730 else if(gCpuCaps.has3DNow)
731 yv12toyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
732 else if(gCpuCaps.hasMMX)
733 yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
734 else
735 #endif
736 yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
737 }
738
739 /**
740 *
741 * width should be a multiple of 16
742 */
743 void yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
744 unsigned int width, unsigned int height,
745 int lumStride, int chromStride, int dstStride)
746 {
747 #ifdef CAN_COMPILE_X86_ASM
748 // ordered per speed fasterst first
749 if(gCpuCaps.hasMMX2)
750 yuv422ptoyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
751 else if(gCpuCaps.has3DNow)
752 yuv422ptoyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
753 else if(gCpuCaps.hasMMX)
754 yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
755 else
756 #endif
757 yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
758 }
759
760 /**
761 *
762 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
763 * problem for anyone then tell me, and ill fix it)
764 */
765 void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
766 unsigned int width, unsigned int height,
767 int lumStride, int chromStride, int srcStride)
768 {
769 #ifdef CAN_COMPILE_X86_ASM
770 // ordered per speed fasterst first
771 if(gCpuCaps.hasMMX2)
772 yuy2toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
773 else if(gCpuCaps.has3DNow)
774 yuy2toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
775 else if(gCpuCaps.hasMMX)
776 yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
777 else
778 #endif
779 yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
780 }
781
782 /**
783 *
784 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
785 * problem for anyone then tell me, and ill fix it)
786 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
787 */
788 void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
789 unsigned int width, unsigned int height,
790 int lumStride, int chromStride, int srcStride)
791 {
792 #ifdef CAN_COMPILE_X86_ASM
793 // ordered per speed fasterst first
794 if(gCpuCaps.hasMMX2)
795 uyvytoyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
796 else if(gCpuCaps.has3DNow)
797 uyvytoyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
798 else if(gCpuCaps.hasMMX)
799 uyvytoyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
800 else
801 uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
802 #else
803 uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
804 #endif
805 }
806
807 void yvu9toyv12(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
808 uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
809 unsigned int width, unsigned int height,
810 int lumStride, int chromStride)
811 {
812 #ifdef CAN_COMPILE_X86_ASM
813 // ordered per speed fasterst first
814 if(gCpuCaps.hasMMX2)
815 yvu9toyv12_MMX2(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
816 else if(gCpuCaps.has3DNow)
817 yvu9toyv12_3DNow(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
818 else if(gCpuCaps.hasMMX)
819 yvu9toyv12_MMX(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
820 else
821 yvu9toyv12_C(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
822 #else
823 yvu9toyv12_C(ysrc, usrc, vsrc, ydst, udst, vdst, width, height, lumStride, chromStride);
824 #endif
825 }
826
827 void planar2x(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride)
828 {
829 #ifdef CAN_COMPILE_X86_ASM
830 // ordered per speed fasterst first
831 if(gCpuCaps.hasMMX2)
832 planar2x_MMX2(src, dst, width, height, srcStride, dstStride);
833 else if(gCpuCaps.has3DNow)
834 planar2x_3DNow(src, dst, width, height, srcStride, dstStride);
835 else
836 #endif
837 planar2x_C(src, dst, width, height, srcStride, dstStride);
838 }
839
840 /**
841 *
842 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
843 * problem for anyone then tell me, and ill fix it)
844 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
845 */
846 void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
847 unsigned int width, unsigned int height,
848 int lumStride, int chromStride, int srcStride)
849 {
850 #ifdef CAN_COMPILE_X86_ASM
851 // ordered per speed fasterst first
852 if(gCpuCaps.hasMMX2)
853 rgb24toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
854 else if(gCpuCaps.has3DNow)
855 rgb24toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
856 else if(gCpuCaps.hasMMX)
857 rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
858 else
859 #endif
860 rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
861 }
862
863 void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
864 unsigned width, unsigned height, int src1Stride,
865 int src2Stride, int dstStride)
866 {
867 #ifdef CAN_COMPILE_X86_ASM
868 // ordered per speed fasterst first
869 if(gCpuCaps.hasMMX2)
870 interleaveBytes_MMX2(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
871 else if(gCpuCaps.has3DNow)
872 interleaveBytes_3DNow(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
873 else if(gCpuCaps.hasMMX)
874 interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
875 else
876 #endif
877 interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
878 }
879
880 void vu9_to_vu12(const uint8_t *src1, const uint8_t *src2,
881 uint8_t *dst1, uint8_t *dst2,
882 unsigned width, unsigned height,
883 int srcStride1, int srcStride2,
884 int dstStride1, int dstStride2)
885 {
886 #ifdef CAN_COMPILE_X86_ASM
887 if(gCpuCaps.hasMMX2)
888 vu9_to_vu12_MMX2(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
889 else if(gCpuCaps.has3DNow)
890 vu9_to_vu12_3DNow(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
891 else if(gCpuCaps.hasMMX)
892 vu9_to_vu12_MMX(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
893 else
894 #endif
895 vu9_to_vu12_C(src1, src2, dst1, dst2, width, height, srcStride1, srcStride2, dstStride1, dstStride2);
896 }
897
898 void yvu9_to_yuy2(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
899 uint8_t *dst,
900 unsigned width, unsigned height,
901 int srcStride1, int srcStride2,
902 int srcStride3, int dstStride)
903 {
904 #ifdef CAN_COMPILE_X86_ASM
905 if(gCpuCaps.hasMMX2)
906 yvu9_to_yuy2_MMX2(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
907 else if(gCpuCaps.has3DNow)
908 yvu9_to_yuy2_3DNow(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
909 else if(gCpuCaps.hasMMX)
910 yvu9_to_yuy2_MMX(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
911 else
912 #endif
913 yvu9_to_yuy2_C(src1, src2, src3, dst, width, height, srcStride1, srcStride2, srcStride3, dstStride);
914 }