annotate postproc/rgb2rgb.c @ 5630:dfc219577da6

FPS calculation fixed
author arpi
date Mon, 15 Apr 2002 02:48:11 +0000
parents f0fa3373f616
children c5cf988c6d6f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
1 /*
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
2 *
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
3 * rgb2rgb.c, Software RGB to RGB convertor
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents: 2725
diff changeset
4 * pluralize by Software PAL8 to RGB convertor
ae79207a3055 Move yuv2rgb to postprocess
nick
parents: 2725
diff changeset
5 * Software YUV to YUV convertor
ae79207a3055 Move yuv2rgb to postprocess
nick
parents: 2725
diff changeset
6 * Software YUV to RGB convertor
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
7 * Written by Nick Kurshev.
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL)
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
9 */
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
10 #include <inttypes.h>
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
11 #include "../config.h"
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
12 #include "rgb2rgb.h"
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
13 #include "../cpudetect.h"
4923
3cc0f4938be1 add mangling
atmos4
parents: 4622
diff changeset
14 #include "../mangle.h"
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
15
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
16 #ifdef ARCH_X86
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
17 #define CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
18 #endif
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
19
4622
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
20 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
21
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
22 #ifdef CAN_COMPILE_X86_ASM
2755
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
23 static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL;
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
24 static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
25 static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
26 static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
5582
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
27 static const uint64_t mask24b __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL;
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
28 static const uint64_t mask24g __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL;
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
29 static const uint64_t mask24r __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL;
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
30 static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
31 static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
2746
dece635a28e3 Minor speedup of rgb32to24. (performance is not successful)
nick
parents: 2741
diff changeset
32 static const uint64_t mask24hh __attribute__((aligned(8))) = 0xffff000000000000ULL;
dece635a28e3 Minor speedup of rgb32to24. (performance is not successful)
nick
parents: 2741
diff changeset
33 static const uint64_t mask24hhh __attribute__((aligned(8))) = 0xffffffff00000000ULL;
dece635a28e3 Minor speedup of rgb32to24. (performance is not successful)
nick
parents: 2741
diff changeset
34 static const uint64_t mask24hhhh __attribute__((aligned(8))) = 0xffffffffffff0000ULL;
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
35 static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
36 static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
37 static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
2741
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
38 static const uint64_t red_16mask __attribute__((aligned(8))) = 0x0000f8000000f800ULL;
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
39 static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL;
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
40 static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
41 static const uint64_t red_15mask __attribute__((aligned(8))) = 0x00007c000000f800ULL;
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
42 static const uint64_t green_15mask __attribute__((aligned(8)))= 0x000003e0000007e0ULL;
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
43 static const uint64_t blue_15mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
4622
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
44
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
45 #ifdef FAST_BGR2YV12
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
46 static const uint64_t bgr2YCoeff __attribute__((aligned(8))) = 0x000000210041000DULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
47 static const uint64_t bgr2UCoeff __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
48 static const uint64_t bgr2VCoeff __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
49 #else
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
50 static const uint64_t bgr2YCoeff __attribute__((aligned(8))) = 0x000020E540830C8BULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
51 static const uint64_t bgr2UCoeff __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
52 static const uint64_t bgr2VCoeff __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
53 #endif
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
54 static const uint64_t bgr2YOffset __attribute__((aligned(8))) = 0x1010101010101010ULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
55 static const uint64_t bgr2UVOffset __attribute__((aligned(8)))= 0x8080808080808080ULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
56 static const uint64_t w1111 __attribute__((aligned(8))) = 0x0001000100010001ULL;
e3a9fae516e4 rgb24toyv12 in MMX (untested)
michael
parents: 3132
diff changeset
57
2755
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
58 #if 0
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
59 static volatile uint64_t __attribute__((aligned(8))) b5Dither;
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
60 static volatile uint64_t __attribute__((aligned(8))) g5Dither;
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
61 static volatile uint64_t __attribute__((aligned(8))) g6Dither;
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
62 static volatile uint64_t __attribute__((aligned(8))) r5Dither;
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
63
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
64 static uint64_t __attribute__((aligned(8))) dither4[2]={
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
65 0x0103010301030103LL,
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
66 0x0200020002000200LL,};
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
67
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
68 static uint64_t __attribute__((aligned(8))) dither8[2]={
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
69 0x0602060206020602LL,
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
70 0x0004000400040004LL,};
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
71 #endif
2535
b44113f46c96 cant compile on non x86 bugfix
michael
parents: 2517
diff changeset
72 #endif
2513
nick
parents: 2512
diff changeset
73
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
74 #define RGB2YUV_SHIFT 8
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
75 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
76 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
77 #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
78 #define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
79 #define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
80 #define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
81 #define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
82 #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
83 #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
84
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
85 //Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
86 //Plain C versions
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
87 #undef HAVE_MMX
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
88 #undef HAVE_MMX2
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
89 #undef HAVE_3DNOW
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
90 #undef ARCH_X86
5338
michael
parents: 5337
diff changeset
91 #undef HAVE_SSE2
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
92 #define RENAME(a) a ## _C
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
93 #include "rgb2rgb_template.c"
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
94
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
95 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
96
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
97 //MMX versions
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
98 #undef RENAME
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
99 #define HAVE_MMX
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
100 #undef HAVE_MMX2
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
101 #undef HAVE_3DNOW
5338
michael
parents: 5337
diff changeset
102 #undef HAVE_SSE2
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
103 #define ARCH_X86
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
104 #define RENAME(a) a ## _MMX
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
105 #include "rgb2rgb_template.c"
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
106
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
107 //MMX2 versions
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
108 #undef RENAME
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
109 #define HAVE_MMX
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
110 #define HAVE_MMX2
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
111 #undef HAVE_3DNOW
5338
michael
parents: 5337
diff changeset
112 #undef HAVE_SSE2
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
113 #define ARCH_X86
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
114 #define RENAME(a) a ## _MMX2
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
115 #include "rgb2rgb_template.c"
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
116
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
117 //3DNOW versions
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
118 #undef RENAME
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
119 #define HAVE_MMX
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
120 #undef HAVE_MMX2
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
121 #define HAVE_3DNOW
5338
michael
parents: 5337
diff changeset
122 #undef HAVE_SSE2
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
123 #define ARCH_X86
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
124 #define RENAME(a) a ## _3DNow
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
125 #include "rgb2rgb_template.c"
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
126
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
127 #endif //CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
128
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
129 void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
130 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
131 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
132 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
133 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
134 rgb24to32_MMX2(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
135 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
136 rgb24to32_3DNow(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
137 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
138 rgb24to32_MMX(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
139 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
140 rgb24to32_C(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
141 #else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
142 rgb24to32_C(src, dst, src_size);
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
143 #endif
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
144 }
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
145
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
146 void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
147 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
148 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
149 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
150 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
151 rgb32to24_MMX2(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
152 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
153 rgb32to24_3DNow(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
154 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
155 rgb32to24_MMX(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
156 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
157 rgb32to24_C(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
158 #else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
159 rgb32to24_C(src, dst, src_size);
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
160 #endif
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
161 }
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
162
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
163 /*
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
164 Original by Strepto/Astral
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
165 ported to gcc & bugfixed : A'rpi
2564
3d04a0991dce cosmetic
nick
parents: 2538
diff changeset
166 MMX2, 3DNOW optimization by Nick Kurshev
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
167 32bit c version, and and&add trick by Michael Niedermayer
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
168 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
169 void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size)
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
170 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
171 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
172 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
173 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
174 rgb15to16_MMX2(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
175 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
176 rgb15to16_3DNow(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
177 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
178 rgb15to16_MMX(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
179 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
180 rgb15to16_C(src, dst, src_size);
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
181 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
182 rgb15to16_C(src, dst, src_size);
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
183 #endif
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
184 }
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
185
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
186 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
187 * Pallete is assumed to contain bgr32
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
188 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
189 void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
190 {
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
191 unsigned i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
192 for(i=0; i<num_pixels; i++)
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
193 ((unsigned *)dst)[i] = ((unsigned *)palette)[ src[i] ];
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
194 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
195
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
196 /**
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
197 * Pallete is assumed to contain bgr32
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
198 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
199 void palette8torgb24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
200 {
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
201 unsigned i;
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
202 /*
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
203 writes 1 byte o much and might cause alignment issues on some architectures?
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
204 for(i=0; i<num_pixels; i++)
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
205 ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
206 */
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
207 for(i=0; i<num_pixels; i++)
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
208 {
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
209 //FIXME slow?
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
210 dst[0]= palette[ src[i]*4+0 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
211 dst[1]= palette[ src[i]*4+1 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
212 dst[2]= palette[ src[i]*4+2 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
213 dst+= 3;
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
214 }
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
215 }
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
216
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
217 void rgb32to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
218 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
219 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
220 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
221 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
222 rgb32to16_MMX2(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
223 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
224 rgb32to16_3DNow(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
225 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
226 rgb32to16_MMX(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
227 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
228 rgb32to16_C(src, dst, src_size);
2741
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
229 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
230 rgb32to16_C(src, dst, src_size);
2741
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
231 #endif
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
232 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
233
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
234 void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
235 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
236 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
237 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
238 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
239 rgb32to15_MMX2(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
240 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
241 rgb32to15_3DNow(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
242 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
243 rgb32to15_MMX(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
244 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
245 rgb32to15_C(src, dst, src_size);
2741
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
246 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
247 rgb32to15_C(src, dst, src_size);
2741
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
248 #endif
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
249 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
250
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
251 void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
252 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
253 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
254 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
255 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
256 rgb24to16_MMX2(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
257 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
258 rgb24to16_3DNow(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
259 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
260 rgb24to16_MMX(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
261 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
262 rgb24to16_C(src, dst, src_size);
2740
1583214489a2 optimized rgb24to16 stuff
nick
parents: 2738
diff changeset
263 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
264 rgb24to16_C(src, dst, src_size);
2740
1583214489a2 optimized rgb24to16 stuff
nick
parents: 2738
diff changeset
265 #endif
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
266 }
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
267
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
268 void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
269 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
270 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
271 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
272 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
273 rgb24to15_MMX2(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
274 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
275 rgb24to15_3DNow(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
276 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
277 rgb24to15_MMX(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
278 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
279 rgb24to15_C(src, dst, src_size);
2741
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
280 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
281 rgb24to15_C(src, dst, src_size);
2741
b8a692c59b64 MMX2, 3DNOW, MMX optimized rgb32(24)to16(15) stuff
nick
parents: 2740
diff changeset
282 #endif
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
283 }
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
284
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
285 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
286 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
287 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
288 void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
289 {
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
290 unsigned i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
291 for(i=0; i<num_pixels; i++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
292 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
293 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
294
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
295 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
296 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
297 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
298 void palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
299 {
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
300 unsigned i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
301 for(i=0; i<num_pixels; i++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
302 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
303 }
2755
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
304
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
305 void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
306 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
307 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
308 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
309 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
310 rgb32tobgr32_MMX2(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
311 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
312 rgb32tobgr32_3DNow(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
313 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
314 rgb32tobgr32_MMX(src, dst, src_size);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
315 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
316 rgb32tobgr32_C(src, dst, src_size);
2755
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
317 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
318 rgb32tobgr32_C(src, dst, src_size);
2755
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
319 #endif
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
320 }
2f93f4351765 rgb32tobgr32 / bgr32torgb32
michael
parents: 2746
diff changeset
321
5582
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
322 void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
323 {
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
324 #ifdef CAN_COMPILE_X86_ASM
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
325 // ordered per speed fasterst first
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
326 if(gCpuCaps.hasMMX2)
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
327 rgb24tobgr24_MMX2(src, dst, src_size);
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
328 else if(gCpuCaps.has3DNow)
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
329 rgb24tobgr24_3DNow(src, dst, src_size);
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
330 else if(gCpuCaps.hasMMX)
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
331 rgb24tobgr24_MMX(src, dst, src_size);
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
332 else
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
333 rgb24tobgr24_C(src, dst, src_size);
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
334 #else
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
335 rgb24tobgr24_C(src, dst, src_size);
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
336 #endif
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
337 }
21bd4b32abb4 rgb24->bgr24
michael
parents: 5338
diff changeset
338
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
339 /**
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
340 *
2724
c08b7af26782 yuy2toyv12 fixed and speedup
michael
parents: 2723
diff changeset
341 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
c08b7af26782 yuy2toyv12 fixed and speedup
michael
parents: 2723
diff changeset
342 * problem for anyone then tell me, and ill fix it)
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
343 */
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
344 void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
2725
5bba527c9a4c unsigned stuff
michael
parents: 2724
diff changeset
345 unsigned int width, unsigned int height,
5bba527c9a4c unsigned stuff
michael
parents: 2724
diff changeset
346 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
347 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
348 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
349 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
350 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
351 yv12toyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
352 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
353 yv12toyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
354 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
355 yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
356 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
357 yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
358 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
359 yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
360 #endif
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
361 }
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
362
2724
c08b7af26782 yuy2toyv12 fixed and speedup
michael
parents: 2723
diff changeset
363 /**
c08b7af26782 yuy2toyv12 fixed and speedup
michael
parents: 2723
diff changeset
364 *
5588
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
365 * width should be a multiple of 16
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
366 */
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
367 void yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
368 unsigned int width, unsigned int height,
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
369 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride)
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
370 {
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
371 #ifdef CAN_COMPILE_X86_ASM
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
372 // ordered per speed fasterst first
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
373 if(gCpuCaps.hasMMX2)
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
374 yuv422ptoyuy2_MMX2(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
375 else if(gCpuCaps.has3DNow)
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
376 yuv422ptoyuy2_3DNow(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
377 else if(gCpuCaps.hasMMX)
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
378 yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
379 else
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
380 yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
381 #else
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
382 yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
383 #endif
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
384 }
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
385
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
386 /**
f0fa3373f616 yuv422p -> yuy2 (untested)
michael
parents: 5582
diff changeset
387 *
2724
c08b7af26782 yuy2toyv12 fixed and speedup
michael
parents: 2723
diff changeset
388 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
c08b7af26782 yuy2toyv12 fixed and speedup
michael
parents: 2723
diff changeset
389 * problem for anyone then tell me, and ill fix it)
c08b7af26782 yuy2toyv12 fixed and speedup
michael
parents: 2723
diff changeset
390 */
c08b7af26782 yuy2toyv12 fixed and speedup
michael
parents: 2723
diff changeset
391 void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
2725
5bba527c9a4c unsigned stuff
michael
parents: 2724
diff changeset
392 unsigned int width, unsigned int height,
5bba527c9a4c unsigned stuff
michael
parents: 2724
diff changeset
393 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
394 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
395 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
396 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
397 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
398 yuy2toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
399 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
400 yuy2toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
401 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
402 yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
403 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
404 yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
2704
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
405 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
406 yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
2704
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
407 #endif
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
408 }
2801
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
409
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
410 /**
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
411 *
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
412 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
413 * problem for anyone then tell me, and ill fix it)
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
414 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
2801
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
415 */
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
416 void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
417 unsigned int width, unsigned int height,
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
418 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
419 {
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
420 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
421 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
422 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
423 uyvytoyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
424 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
425 uyvytoyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
426 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
427 uyvytoyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
428 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
429 uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
2847
1d92268eb8fc uyvytoyv12 in MMX (untested)
michael
parents: 2806
diff changeset
430 #else
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
431 uyvytoyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
2847
1d92268eb8fc uyvytoyv12 in MMX (untested)
michael
parents: 2806
diff changeset
432 #endif
2801
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
433 }
318c240363c7 uyvy->uv12 added
arpi
parents: 2800
diff changeset
434
3132
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
435 /**
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
436 *
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
437 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
438 * problem for anyone then tell me, and ill fix it)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
439 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
440 */
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
441 void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
442 unsigned int width, unsigned int height,
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
443 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
444 {
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
445 #ifdef CAN_COMPILE_X86_ASM
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
446 // ordered per speed fasterst first
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
447 if(gCpuCaps.hasMMX2)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
448 rgb24toyv12_MMX2(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
449 else if(gCpuCaps.has3DNow)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
450 rgb24toyv12_3DNow(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
451 else if(gCpuCaps.hasMMX)
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
452 rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
453 else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
454 rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
455 #else
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
456 rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
457 #endif
ab67556586fa runtime cpu detection
michael
parents: 2847
diff changeset
458 }
5337
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
459
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
460 void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
461 int width, int height, int src1Stride, int src2Stride, int dstStride)
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
462 {
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
463 #ifdef CAN_COMPILE_X86_ASM
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
464 // ordered per speed fasterst first
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
465 if(gCpuCaps.hasMMX2)
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
466 interleaveBytes_MMX2(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
467 else if(gCpuCaps.has3DNow)
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
468 interleaveBytes_3DNow(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
469 else if(gCpuCaps.hasMMX)
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
470 interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
471 else
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
472 interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
473 #else
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
474 interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
475 #endif
0bd1c35aa42c byte interleaving for mga
michael
parents: 4923
diff changeset
476 }