Mercurial > mplayer.hg
annotate postproc/rgb2rgb_template.c @ 11007:48b7d7aa444d
configure altivec patch by Magnus Damm <damm@opensource.se>
* CC is not checked for Altivec support (see above).
The patch adds checks for FSF-style flags and Darwin-style flags.
The check is performed regardless of the gcc version.
* Disabling of Altivec.
--disable-altivec is broken today if /proc/cpuinfo shows that your cpu
supports altivec. The patch takes care of that.
* "GCC & CPU optimization abilities" always show that it is optimizing
for the cpu configure is running on, it should show the optimization that
is enabled for gcc instead. Cosmetic change only, but confusing as it is
today IMHO.
* Runtime CPU-detection now enables altivec for powerpc.
Now with the patch it should be possible to use --enable-altivec,
--disable-altivec, --enable-runtime-cpudetection regardless of powerpc cpu type.
The configure script handles altivec support in the following order:
1. Altivec is enabled by default if your cpu supports it.
2. --enable-runtime-cpudetection will enable altivec support.
3. If you have forced altivec on/off with --enable-altivec/--disable-altivec, then
your selection will override the previous altivec configuration.
4. If altivec is enabled but the compiler doesn't support it, altivec gets turned off.
author | attila |
---|---|
date | Sat, 04 Oct 2003 23:06:04 +0000 |
parents | a32fb6812221 |
children | f33f908ae085 |
rev | line source |
---|---|
2694 | 1 /* |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
2 * |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
3 * rgb2rgb.c, Software RGB to RGB convertor |
2732 | 4 * pluralize by Software PAL8 to RGB convertor |
5 * Software YUV to YUV convertor | |
6 * Software YUV to RGB convertor | |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
7 * Written by Nick Kurshev. |
3132 | 8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
9 */ |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
10 |
6492 | 11 #include <stddef.h> |
12 #include <inttypes.h> /* for __WORDSIZE */ | |
13 | |
14 #ifndef __WORDSIZE | |
7421
0684cad9b204
use detected WORDSIZE instead of warning, when inttypes.h doesn't define __WORDSIZE
arpi
parents:
6608
diff
changeset
|
15 // #warning You have misconfigured system and probably will lose performance! |
0684cad9b204
use detected WORDSIZE instead of warning, when inttypes.h doesn't define __WORDSIZE
arpi
parents:
6608
diff
changeset
|
16 #define __WORDSIZE MP_WORDSIZE |
6492 | 17 #endif |
18 | |
3132 | 19 #undef PREFETCH |
20 #undef MOVNTQ | |
21 #undef EMMS | |
22 #undef SFENCE | |
23 #undef MMREG_SIZE | |
24 #undef PREFETCHW | |
25 #undef PAVGB | |
2755 | 26 |
3132 | 27 #ifdef HAVE_SSE2 |
28 #define MMREG_SIZE 16 | |
29 #else | |
30 #define MMREG_SIZE 8 | |
2535 | 31 #endif |
2513 | 32 |
3132 | 33 #ifdef HAVE_3DNOW |
34 #define PREFETCH "prefetch" | |
35 #define PREFETCHW "prefetchw" | |
36 #define PAVGB "pavgusb" | |
37 #elif defined ( HAVE_MMX2 ) | |
38 #define PREFETCH "prefetchnta" | |
39 #define PREFETCHW "prefetcht0" | |
40 #define PAVGB "pavgb" | |
41 #else | |
42 #define PREFETCH "/nop" | |
43 #define PREFETCHW "/nop" | |
44 #endif | |
45 | |
46 #ifdef HAVE_3DNOW | |
47 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
48 #define EMMS "femms" | |
49 #else | |
50 #define EMMS "emms" | |
51 #endif | |
52 | |
53 #ifdef HAVE_MMX2 | |
54 #define MOVNTQ "movntq" | |
55 #define SFENCE "sfence" | |
56 #else | |
57 #define MOVNTQ "movq" | |
58 #define SFENCE "/nop" | |
59 #endif | |
60 | |
61 static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,unsigned src_size) | |
2504 | 62 { |
2508 | 63 uint8_t *dest = dst; |
2677 | 64 const uint8_t *s = src; |
65 const uint8_t *end; | |
2510 | 66 #ifdef HAVE_MMX |
6605 | 67 const uint8_t *mm_end; |
2510 | 68 #endif |
2504 | 69 end = s + src_size; |
2510 | 70 #ifdef HAVE_MMX |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
71 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
6605 | 72 mm_end = end - 23; |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
73 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); |
2510 | 74 while(s < mm_end) |
75 { | |
2511 | 76 __asm __volatile( |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
77 PREFETCH" 32%1\n\t" |
2510 | 78 "movd %1, %%mm0\n\t" |
2738 | 79 "punpckldq 3%1, %%mm0\n\t" |
80 "movd 6%1, %%mm1\n\t" | |
81 "punpckldq 9%1, %%mm1\n\t" | |
82 "movd 12%1, %%mm2\n\t" | |
83 "punpckldq 15%1, %%mm2\n\t" | |
84 "movd 18%1, %%mm3\n\t" | |
85 "punpckldq 21%1, %%mm3\n\t" | |
2510 | 86 "pand %%mm7, %%mm0\n\t" |
2738 | 87 "pand %%mm7, %%mm1\n\t" |
2510 | 88 "pand %%mm7, %%mm2\n\t" |
2738 | 89 "pand %%mm7, %%mm3\n\t" |
2511 | 90 MOVNTQ" %%mm0, %0\n\t" |
2738 | 91 MOVNTQ" %%mm1, 8%0\n\t" |
92 MOVNTQ" %%mm2, 16%0\n\t" | |
93 MOVNTQ" %%mm3, 24%0" | |
2510 | 94 :"=m"(*dest) |
95 :"m"(*s) | |
96 :"memory"); | |
2738 | 97 dest += 32; |
98 s += 24; | |
2510 | 99 } |
2513 | 100 __asm __volatile(SFENCE:::"memory"); |
2511 | 101 __asm __volatile(EMMS:::"memory"); |
2510 | 102 #endif |
2504 | 103 while(s < end) |
104 { | |
2508 | 105 *dest++ = *s++; |
106 *dest++ = *s++; | |
107 *dest++ = *s++; | |
108 *dest++ = 0; | |
2504 | 109 } |
110 } | |
2505 | 111 |
3132 | 112 static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned src_size) |
2505 | 113 { |
114 uint8_t *dest = dst; | |
2677 | 115 const uint8_t *s = src; |
116 const uint8_t *end; | |
2517 | 117 #ifdef HAVE_MMX |
6605 | 118 const uint8_t *mm_end; |
2517 | 119 #endif |
2505 | 120 end = s + src_size; |
2517 | 121 #ifdef HAVE_MMX |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
122 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
6605 | 123 mm_end = end - 31; |
2517 | 124 while(s < mm_end) |
125 { | |
126 __asm __volatile( | |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
127 PREFETCH" 32%1\n\t" |
2517 | 128 "movq %1, %%mm0\n\t" |
129 "movq 8%1, %%mm1\n\t" | |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
130 "movq 16%1, %%mm4\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
131 "movq 24%1, %%mm5\n\t" |
2517 | 132 "movq %%mm0, %%mm2\n\t" |
133 "movq %%mm1, %%mm3\n\t" | |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
134 "movq %%mm4, %%mm6\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
135 "movq %%mm5, %%mm7\n\t" |
2517 | 136 "psrlq $8, %%mm2\n\t" |
137 "psrlq $8, %%mm3\n\t" | |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
138 "psrlq $8, %%mm6\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
139 "psrlq $8, %%mm7\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
140 "pand %2, %%mm0\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
141 "pand %2, %%mm1\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
142 "pand %2, %%mm4\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
143 "pand %2, %%mm5\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
144 "pand %3, %%mm2\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
145 "pand %3, %%mm3\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
146 "pand %3, %%mm6\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
147 "pand %3, %%mm7\n\t" |
2517 | 148 "por %%mm2, %%mm0\n\t" |
149 "por %%mm3, %%mm1\n\t" | |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
150 "por %%mm6, %%mm4\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
151 "por %%mm7, %%mm5\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
152 |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
153 "movq %%mm1, %%mm2\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
154 "movq %%mm4, %%mm3\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
155 "psllq $48, %%mm2\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
156 "psllq $32, %%mm3\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
157 "pand %4, %%mm2\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
158 "pand %5, %%mm3\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
159 "por %%mm2, %%mm0\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
160 "psrlq $16, %%mm1\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
161 "psrlq $32, %%mm4\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
162 "psllq $16, %%mm5\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
163 "por %%mm3, %%mm1\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
164 "pand %6, %%mm5\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
165 "por %%mm5, %%mm4\n\t" |
3132 | 166 |
2517 | 167 MOVNTQ" %%mm0, %0\n\t" |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
168 MOVNTQ" %%mm1, 8%0\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
169 MOVNTQ" %%mm4, 16%0" |
2517 | 170 :"=m"(*dest) |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
171 :"m"(*s),"m"(mask24l), |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
172 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) |
2517 | 173 :"memory"); |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
174 dest += 24; |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
175 s += 32; |
2517 | 176 } |
177 __asm __volatile(SFENCE:::"memory"); | |
178 __asm __volatile(EMMS:::"memory"); | |
179 #endif | |
2505 | 180 while(s < end) |
181 { | |
182 *dest++ = *s++; | |
183 *dest++ = *s++; | |
184 *dest++ = *s++; | |
185 s++; | |
186 } | |
187 } | |
2506 | 188 |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
189 /* |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
190 Original by Strepto/Astral |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
191 ported to gcc & bugfixed : A'rpi |
2564 | 192 MMX2, 3DNOW optimization by Nick Kurshev |
2698
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
193 32bit c version, and and&add trick by Michael Niedermayer |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
194 */ |
3132 | 195 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) |
2506 | 196 { |
6492 | 197 register const uint8_t* s=src; |
198 register uint8_t* d=dst; | |
199 register const uint8_t *end; | |
6605 | 200 const uint8_t *mm_end; |
6492 | 201 end = s + src_size; |
2506 | 202 #ifdef HAVE_MMX |
6492 | 203 __asm __volatile(PREFETCH" %0"::"m"(*s)); |
204 __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); | |
6605 | 205 mm_end = end - 15; |
6492 | 206 while(s<mm_end) |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
207 { |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
208 __asm __volatile( |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
209 PREFETCH" 32%1\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
210 "movq %1, %%mm0\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
211 "movq 8%1, %%mm2\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
212 "movq %%mm0, %%mm1\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
213 "movq %%mm2, %%mm3\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
214 "pand %%mm4, %%mm0\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
215 "pand %%mm4, %%mm2\n\t" |
2698
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
216 "paddw %%mm1, %%mm0\n\t" |
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
217 "paddw %%mm3, %%mm2\n\t" |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
218 MOVNTQ" %%mm0, %0\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
219 MOVNTQ" %%mm2, 8%0" |
6492 | 220 :"=m"(*d) |
221 :"m"(*s) | |
2698
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
222 ); |
6492 | 223 d+=16; |
224 s+=16; | |
2506 | 225 } |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
226 __asm __volatile(SFENCE:::"memory"); |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
227 __asm __volatile(EMMS:::"memory"); |
2698
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
228 #endif |
6605 | 229 mm_end = end - 3; |
6492 | 230 while(s < mm_end) |
231 { | |
232 register unsigned x= *((uint32_t *)s); | |
233 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); | |
234 d+=4; | |
235 s+=4; | |
236 } | |
237 if(s < end) | |
238 { | |
239 register unsigned short x= *((uint16_t *)s); | |
240 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); | |
241 } | |
2506 | 242 } |
2694 | 243 |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
244 static inline void RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,unsigned src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
245 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
246 register const uint8_t* s=src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
247 register uint8_t* d=dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
248 register const uint8_t *end; |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
249 const uint8_t *mm_end; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
250 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
251 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
252 __asm __volatile(PREFETCH" %0"::"m"(*s)); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
253 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg)); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
254 __asm __volatile("movq %0, %%mm6"::"m"(mask15b)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
255 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
256 while(s<mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
257 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
258 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
259 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
260 "movq %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
261 "movq 8%1, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
262 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
263 "movq %%mm2, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
264 "psrlq $1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
265 "psrlq $1, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
266 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
267 "pand %%mm7, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
268 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
269 "pand %%mm6, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
270 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
271 "por %%mm3, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
272 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
273 MOVNTQ" %%mm2, 8%0" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
274 :"=m"(*d) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
275 :"m"(*s) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
276 ); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
277 d+=16; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
278 s+=16; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
279 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
280 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
281 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
282 #endif |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
283 mm_end = end - 3; |
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
284 while(s < mm_end) |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
285 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
286 register uint32_t x= *((uint32_t *)s); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
287 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
288 s+=4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
289 d+=4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
290 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
291 if(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
292 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
293 register uint16_t x= *((uint16_t *)s); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
294 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
295 s+=2; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
296 d+=2; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
297 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
298 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
299 |
3132 | 300 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
2694 | 301 { |
6492 | 302 const uint8_t *s = src; |
303 const uint8_t *end; | |
2741 | 304 #ifdef HAVE_MMX |
6492 | 305 const uint8_t *mm_end; |
306 #endif | |
2741 | 307 uint16_t *d = (uint16_t *)dst; |
308 end = s + src_size; | |
6492 | 309 #ifdef HAVE_MMX |
9454 | 310 mm_end = end - 15; |
311 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) | |
312 asm volatile( | |
313 "movq %3, %%mm5 \n\t" | |
314 "movq %4, %%mm6 \n\t" | |
315 "movq %5, %%mm7 \n\t" | |
316 ".balign 16 \n\t" | |
317 "1: \n\t" | |
318 PREFETCH" 32(%1) \n\t" | |
319 "movd (%1), %%mm0 \n\t" | |
320 "movd 4(%1), %%mm3 \n\t" | |
321 "punpckldq 8(%1), %%mm0 \n\t" | |
322 "punpckldq 12(%1), %%mm3 \n\t" | |
323 "movq %%mm0, %%mm1 \n\t" | |
324 "movq %%mm3, %%mm4 \n\t" | |
325 "pand %%mm6, %%mm0 \n\t" | |
326 "pand %%mm6, %%mm3 \n\t" | |
327 "pmaddwd %%mm7, %%mm0 \n\t" | |
328 "pmaddwd %%mm7, %%mm3 \n\t" | |
329 "pand %%mm5, %%mm1 \n\t" | |
330 "pand %%mm5, %%mm4 \n\t" | |
331 "por %%mm1, %%mm0 \n\t" | |
332 "por %%mm4, %%mm3 \n\t" | |
333 "psrld $5, %%mm0 \n\t" | |
334 "pslld $11, %%mm3 \n\t" | |
335 "por %%mm3, %%mm0 \n\t" | |
336 MOVNTQ" %%mm0, (%0) \n\t" | |
337 "addl $16, %1 \n\t" | |
338 "addl $8, %0 \n\t" | |
339 "cmpl %2, %1 \n\t" | |
340 " jb 1b \n\t" | |
341 : "+r" (d), "+r"(s) | |
342 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) | |
343 ); | |
344 #else | |
2741 | 345 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
346 __asm __volatile( | |
347 "movq %0, %%mm7\n\t" | |
348 "movq %1, %%mm6\n\t" | |
349 ::"m"(red_16mask),"m"(green_16mask)); | |
350 while(s < mm_end) | |
351 { | |
352 __asm __volatile( | |
353 PREFETCH" 32%1\n\t" | |
354 "movd %1, %%mm0\n\t" | |
355 "movd 4%1, %%mm3\n\t" | |
356 "punpckldq 8%1, %%mm0\n\t" | |
357 "punpckldq 12%1, %%mm3\n\t" | |
358 "movq %%mm0, %%mm1\n\t" | |
359 "movq %%mm0, %%mm2\n\t" | |
360 "movq %%mm3, %%mm4\n\t" | |
361 "movq %%mm3, %%mm5\n\t" | |
362 "psrlq $3, %%mm0\n\t" | |
363 "psrlq $3, %%mm3\n\t" | |
364 "pand %2, %%mm0\n\t" | |
365 "pand %2, %%mm3\n\t" | |
366 "psrlq $5, %%mm1\n\t" | |
367 "psrlq $5, %%mm4\n\t" | |
368 "pand %%mm6, %%mm1\n\t" | |
369 "pand %%mm6, %%mm4\n\t" | |
370 "psrlq $8, %%mm2\n\t" | |
371 "psrlq $8, %%mm5\n\t" | |
372 "pand %%mm7, %%mm2\n\t" | |
373 "pand %%mm7, %%mm5\n\t" | |
374 "por %%mm1, %%mm0\n\t" | |
375 "por %%mm4, %%mm3\n\t" | |
376 "por %%mm2, %%mm0\n\t" | |
377 "por %%mm5, %%mm3\n\t" | |
378 "psllq $16, %%mm3\n\t" | |
379 "por %%mm3, %%mm0\n\t" | |
380 MOVNTQ" %%mm0, %0\n\t" | |
381 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
382 d += 4; | |
383 s += 16; | |
384 } | |
9454 | 385 #endif |
6492 | 386 __asm __volatile(SFENCE:::"memory"); |
387 __asm __volatile(EMMS:::"memory"); | |
388 #endif | |
2741 | 389 while(s < end) |
390 { | |
9430 | 391 const int src= *((uint32_t*)s)++; |
392 *d++ = ((src&0xFF)>>3) + ((src&0xFC00)>>5) + ((src&0xF80000)>>8); | |
393 // *d++ = ((src>>3)&0x1F) + ((src>>5)&0x7E0) + ((src>>8)&0xF800); | |
2741 | 394 } |
2694 | 395 } |
396 | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
397 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
398 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
399 const uint8_t *s = src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
400 const uint8_t *end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
401 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
402 const uint8_t *mm_end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
403 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
404 uint16_t *d = (uint16_t *)dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
405 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
406 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
407 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
408 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
409 "movq %0, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
410 "movq %1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
411 ::"m"(red_16mask),"m"(green_16mask)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
412 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
413 while(s < mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
414 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
415 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
416 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
417 "movd %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
418 "movd 4%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
419 "punpckldq 8%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
420 "punpckldq 12%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
421 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
422 "movq %%mm0, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
423 "movq %%mm3, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
424 "movq %%mm3, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
425 "psllq $8, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
426 "psllq $8, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
427 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
428 "pand %%mm7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
429 "psrlq $5, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
430 "psrlq $5, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
431 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
432 "pand %%mm6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
433 "psrlq $19, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
434 "psrlq $19, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
435 "pand %2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
436 "pand %2, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
437 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
438 "por %%mm4, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
439 "por %%mm2, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
440 "por %%mm5, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
441 "psllq $16, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
442 "por %%mm3, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
443 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
444 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
445 d += 4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
446 s += 16; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
447 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
448 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
449 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
450 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
451 while(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
452 { |
9430 | 453 const int src= *((uint32_t*)s)++; |
454 *d++ = ((src&0xF8)<<8) + ((src&0xFC00)>>5) + ((src&0xF80000)>>19); | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
455 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
456 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
457 |
3132 | 458 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
2694 | 459 { |
6492 | 460 const uint8_t *s = src; |
461 const uint8_t *end; | |
2741 | 462 #ifdef HAVE_MMX |
6492 | 463 const uint8_t *mm_end; |
464 #endif | |
2741 | 465 uint16_t *d = (uint16_t *)dst; |
466 end = s + src_size; | |
6492 | 467 #ifdef HAVE_MMX |
9454 | 468 mm_end = end - 15; |
469 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) | |
470 asm volatile( | |
471 "movq %3, %%mm5 \n\t" | |
472 "movq %4, %%mm6 \n\t" | |
473 "movq %5, %%mm7 \n\t" | |
474 ".balign 16 \n\t" | |
475 "1: \n\t" | |
476 PREFETCH" 32(%1) \n\t" | |
477 "movd (%1), %%mm0 \n\t" | |
478 "movd 4(%1), %%mm3 \n\t" | |
479 "punpckldq 8(%1), %%mm0 \n\t" | |
480 "punpckldq 12(%1), %%mm3 \n\t" | |
481 "movq %%mm0, %%mm1 \n\t" | |
482 "movq %%mm3, %%mm4 \n\t" | |
483 "pand %%mm6, %%mm0 \n\t" | |
484 "pand %%mm6, %%mm3 \n\t" | |
485 "pmaddwd %%mm7, %%mm0 \n\t" | |
486 "pmaddwd %%mm7, %%mm3 \n\t" | |
487 "pand %%mm5, %%mm1 \n\t" | |
488 "pand %%mm5, %%mm4 \n\t" | |
489 "por %%mm1, %%mm0 \n\t" | |
490 "por %%mm4, %%mm3 \n\t" | |
491 "psrld $6, %%mm0 \n\t" | |
492 "pslld $10, %%mm3 \n\t" | |
493 "por %%mm3, %%mm0 \n\t" | |
494 MOVNTQ" %%mm0, (%0) \n\t" | |
495 "addl $16, %1 \n\t" | |
496 "addl $8, %0 \n\t" | |
497 "cmpl %2, %1 \n\t" | |
498 " jb 1b \n\t" | |
499 : "+r" (d), "+r"(s) | |
500 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) | |
501 ); | |
502 #else | |
2741 | 503 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
504 __asm __volatile( | |
505 "movq %0, %%mm7\n\t" | |
506 "movq %1, %%mm6\n\t" | |
507 ::"m"(red_15mask),"m"(green_15mask)); | |
508 while(s < mm_end) | |
509 { | |
510 __asm __volatile( | |
511 PREFETCH" 32%1\n\t" | |
512 "movd %1, %%mm0\n\t" | |
513 "movd 4%1, %%mm3\n\t" | |
514 "punpckldq 8%1, %%mm0\n\t" | |
515 "punpckldq 12%1, %%mm3\n\t" | |
516 "movq %%mm0, %%mm1\n\t" | |
517 "movq %%mm0, %%mm2\n\t" | |
518 "movq %%mm3, %%mm4\n\t" | |
519 "movq %%mm3, %%mm5\n\t" | |
520 "psrlq $3, %%mm0\n\t" | |
521 "psrlq $3, %%mm3\n\t" | |
522 "pand %2, %%mm0\n\t" | |
523 "pand %2, %%mm3\n\t" | |
524 "psrlq $6, %%mm1\n\t" | |
525 "psrlq $6, %%mm4\n\t" | |
526 "pand %%mm6, %%mm1\n\t" | |
527 "pand %%mm6, %%mm4\n\t" | |
528 "psrlq $9, %%mm2\n\t" | |
529 "psrlq $9, %%mm5\n\t" | |
530 "pand %%mm7, %%mm2\n\t" | |
531 "pand %%mm7, %%mm5\n\t" | |
532 "por %%mm1, %%mm0\n\t" | |
533 "por %%mm4, %%mm3\n\t" | |
534 "por %%mm2, %%mm0\n\t" | |
535 "por %%mm5, %%mm3\n\t" | |
536 "psllq $16, %%mm3\n\t" | |
537 "por %%mm3, %%mm0\n\t" | |
538 MOVNTQ" %%mm0, %0\n\t" | |
539 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
540 d += 4; | |
541 s += 16; | |
542 } | |
9454 | 543 #endif |
6492 | 544 __asm __volatile(SFENCE:::"memory"); |
545 __asm __volatile(EMMS:::"memory"); | |
546 #endif | |
2741 | 547 while(s < end) |
548 { | |
9430 | 549 const int src= *((uint32_t*)s)++; |
550 *d++ = ((src&0xFF)>>3) + ((src&0xF800)>>6) + ((src&0xF80000)>>9); | |
2741 | 551 } |
2694 | 552 } |
553 | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
554 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
555 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
556 const uint8_t *s = src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
557 const uint8_t *end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
558 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
559 const uint8_t *mm_end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
560 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
561 uint16_t *d = (uint16_t *)dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
562 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
563 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
564 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
565 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
566 "movq %0, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
567 "movq %1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
568 ::"m"(red_15mask),"m"(green_15mask)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
569 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
570 while(s < mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
571 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
572 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
573 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
574 "movd %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
575 "movd 4%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
576 "punpckldq 8%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
577 "punpckldq 12%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
578 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
579 "movq %%mm0, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
580 "movq %%mm3, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
581 "movq %%mm3, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
582 "psllq $7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
583 "psllq $7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
584 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
585 "pand %%mm7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
586 "psrlq $6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
587 "psrlq $6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
588 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
589 "pand %%mm6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
590 "psrlq $19, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
591 "psrlq $19, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
592 "pand %2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
593 "pand %2, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
594 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
595 "por %%mm4, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
596 "por %%mm2, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
597 "por %%mm5, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
598 "psllq $16, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
599 "por %%mm3, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
600 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
601 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
602 d += 4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
603 s += 16; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
604 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
605 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
606 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
607 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
608 while(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
609 { |
9430 | 610 const int src= *((uint32_t*)s)++; |
611 *d++ = ((src&0xF8)<<7) + ((src&0xF800)>>6) + ((src&0xF80000)>>19); | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
612 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
613 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
614 |
3132 | 615 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
2718 | 616 { |
6492 | 617 const uint8_t *s = src; |
618 const uint8_t *end; | |
2740 | 619 #ifdef HAVE_MMX |
6492 | 620 const uint8_t *mm_end; |
621 #endif | |
2719
fafa73d6d80c
Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents:
2718
diff
changeset
|
622 uint16_t *d = (uint16_t *)dst; |
2740 | 623 end = s + src_size; |
6492 | 624 #ifdef HAVE_MMX |
2738 | 625 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
626 __asm __volatile( | |
627 "movq %0, %%mm7\n\t" | |
628 "movq %1, %%mm6\n\t" | |
2741 | 629 ::"m"(red_16mask),"m"(green_16mask)); |
6605 | 630 mm_end = end - 11; |
2740 | 631 while(s < mm_end) |
2738 | 632 { |
633 __asm __volatile( | |
634 PREFETCH" 32%1\n\t" | |
635 "movd %1, %%mm0\n\t" | |
2740 | 636 "movd 3%1, %%mm3\n\t" |
637 "punpckldq 6%1, %%mm0\n\t" | |
2738 | 638 "punpckldq 9%1, %%mm3\n\t" |
639 "movq %%mm0, %%mm1\n\t" | |
640 "movq %%mm0, %%mm2\n\t" | |
641 "movq %%mm3, %%mm4\n\t" | |
642 "movq %%mm3, %%mm5\n\t" | |
643 "psrlq $3, %%mm0\n\t" | |
644 "psrlq $3, %%mm3\n\t" | |
2740 | 645 "pand %2, %%mm0\n\t" |
646 "pand %2, %%mm3\n\t" | |
647 "psrlq $5, %%mm1\n\t" | |
648 "psrlq $5, %%mm4\n\t" | |
649 "pand %%mm6, %%mm1\n\t" | |
650 "pand %%mm6, %%mm4\n\t" | |
651 "psrlq $8, %%mm2\n\t" | |
652 "psrlq $8, %%mm5\n\t" | |
653 "pand %%mm7, %%mm2\n\t" | |
654 "pand %%mm7, %%mm5\n\t" | |
2738 | 655 "por %%mm1, %%mm0\n\t" |
2740 | 656 "por %%mm4, %%mm3\n\t" |
2738 | 657 "por %%mm2, %%mm0\n\t" |
658 "por %%mm5, %%mm3\n\t" | |
2740 | 659 "psllq $16, %%mm3\n\t" |
660 "por %%mm3, %%mm0\n\t" | |
2738 | 661 MOVNTQ" %%mm0, %0\n\t" |
2741 | 662 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
2740 | 663 d += 4; |
664 s += 12; | |
2738 | 665 } |
6492 | 666 __asm __volatile(SFENCE:::"memory"); |
667 __asm __volatile(EMMS:::"memory"); | |
668 #endif | |
2740 | 669 while(s < end) |
670 { | |
671 const int b= *s++; | |
672 const int g= *s++; | |
673 const int r= *s++; | |
674 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | |
675 } | |
2718 | 676 } |
677 | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
678 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
679 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
680 const uint8_t *s = src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
681 const uint8_t *end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
682 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
683 const uint8_t *mm_end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
684 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
685 uint16_t *d = (uint16_t *)dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
686 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
687 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
688 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
689 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
690 "movq %0, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
691 "movq %1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
692 ::"m"(red_16mask),"m"(green_16mask)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
693 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
694 while(s < mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
695 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
696 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
697 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
698 "movd %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
699 "movd 3%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
700 "punpckldq 6%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
701 "punpckldq 9%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
702 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
703 "movq %%mm0, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
704 "movq %%mm3, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
705 "movq %%mm3, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
706 "psllq $8, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
707 "psllq $8, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
708 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
709 "pand %%mm7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
710 "psrlq $5, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
711 "psrlq $5, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
712 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
713 "pand %%mm6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
714 "psrlq $19, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
715 "psrlq $19, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
716 "pand %2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
717 "pand %2, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
718 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
719 "por %%mm4, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
720 "por %%mm2, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
721 "por %%mm5, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
722 "psllq $16, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
723 "por %%mm3, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
724 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
725 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
726 d += 4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
727 s += 12; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
728 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
729 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
730 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
731 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
732 while(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
733 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
734 const int r= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
735 const int g= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
736 const int b= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
737 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
738 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
739 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
740 |
3132 | 741 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
2718 | 742 { |
6492 | 743 const uint8_t *s = src; |
744 const uint8_t *end; | |
2741 | 745 #ifdef HAVE_MMX |
6492 | 746 const uint8_t *mm_end; |
747 #endif | |
2741 | 748 uint16_t *d = (uint16_t *)dst; |
749 end = s + src_size; | |
6492 | 750 #ifdef HAVE_MMX |
2741 | 751 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
752 __asm __volatile( | |
753 "movq %0, %%mm7\n\t" | |
754 "movq %1, %%mm6\n\t" | |
755 ::"m"(red_15mask),"m"(green_15mask)); | |
6605 | 756 mm_end = end - 11; |
2741 | 757 while(s < mm_end) |
758 { | |
759 __asm __volatile( | |
760 PREFETCH" 32%1\n\t" | |
761 "movd %1, %%mm0\n\t" | |
762 "movd 3%1, %%mm3\n\t" | |
763 "punpckldq 6%1, %%mm0\n\t" | |
764 "punpckldq 9%1, %%mm3\n\t" | |
765 "movq %%mm0, %%mm1\n\t" | |
766 "movq %%mm0, %%mm2\n\t" | |
767 "movq %%mm3, %%mm4\n\t" | |
768 "movq %%mm3, %%mm5\n\t" | |
769 "psrlq $3, %%mm0\n\t" | |
770 "psrlq $3, %%mm3\n\t" | |
771 "pand %2, %%mm0\n\t" | |
772 "pand %2, %%mm3\n\t" | |
773 "psrlq $6, %%mm1\n\t" | |
774 "psrlq $6, %%mm4\n\t" | |
775 "pand %%mm6, %%mm1\n\t" | |
776 "pand %%mm6, %%mm4\n\t" | |
777 "psrlq $9, %%mm2\n\t" | |
778 "psrlq $9, %%mm5\n\t" | |
779 "pand %%mm7, %%mm2\n\t" | |
780 "pand %%mm7, %%mm5\n\t" | |
781 "por %%mm1, %%mm0\n\t" | |
782 "por %%mm4, %%mm3\n\t" | |
783 "por %%mm2, %%mm0\n\t" | |
784 "por %%mm5, %%mm3\n\t" | |
785 "psllq $16, %%mm3\n\t" | |
786 "por %%mm3, %%mm0\n\t" | |
787 MOVNTQ" %%mm0, %0\n\t" | |
788 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
789 d += 4; | |
790 s += 12; | |
791 } | |
6492 | 792 __asm __volatile(SFENCE:::"memory"); |
793 __asm __volatile(EMMS:::"memory"); | |
794 #endif | |
2741 | 795 while(s < end) |
796 { | |
797 const int b= *s++; | |
798 const int g= *s++; | |
799 const int r= *s++; | |
800 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | |
801 } | |
6492 | 802 } |
803 | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
804 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
805 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
806 const uint8_t *s = src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
807 const uint8_t *end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
808 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
809 const uint8_t *mm_end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
810 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
811 uint16_t *d = (uint16_t *)dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
812 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
813 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
814 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
815 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
816 "movq %0, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
817 "movq %1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
818 ::"m"(red_15mask),"m"(green_15mask)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
819 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
820 while(s < mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
821 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
822 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
823 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
824 "movd %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
825 "movd 3%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
826 "punpckldq 6%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
827 "punpckldq 9%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
828 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
829 "movq %%mm0, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
830 "movq %%mm3, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
831 "movq %%mm3, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
832 "psllq $7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
833 "psllq $7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
834 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
835 "pand %%mm7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
836 "psrlq $6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
837 "psrlq $6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
838 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
839 "pand %%mm6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
840 "psrlq $19, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
841 "psrlq $19, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
842 "pand %2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
843 "pand %2, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
844 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
845 "por %%mm4, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
846 "por %%mm2, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
847 "por %%mm5, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
848 "psllq $16, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
849 "por %%mm3, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
850 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
851 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
852 d += 4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
853 s += 12; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
854 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
855 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
856 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
857 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
858 while(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
859 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
860 const int r= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
861 const int g= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
862 const int b= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
863 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
864 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
865 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
866 |
6492 | 867 /* |
868 I use here less accurate approximation by simply | |
869 left-shifting the input | |
870 value and filling the low order bits with | |
871 zeroes. This method improves png's | |
872 compression but this scheme cannot reproduce white exactly, since it does not | |
873 generate an all-ones maximum value; the net effect is to darken the | |
874 image slightly. | |
875 | |
876 The better method should be "left bit replication": | |
877 | |
878 4 3 2 1 0 | |
879 --------- | |
880 1 1 0 1 1 | |
881 | |
882 7 6 5 4 3 2 1 0 | |
883 ---------------- | |
884 1 1 0 1 1 1 1 0 | |
885 |=======| |===| | |
886 | Leftmost Bits Repeated to Fill Open Bits | |
887 | | |
888 Original Bits | |
889 */ | |
890 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
891 { | |
892 const uint16_t *end; | |
893 #ifdef HAVE_MMX | |
894 const uint16_t *mm_end; | |
895 #endif | |
896 uint8_t *d = (uint8_t *)dst; | |
897 const uint16_t *s = (uint16_t *)src; | |
898 end = s + src_size/2; | |
899 #ifdef HAVE_MMX | |
900 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
6605 | 901 mm_end = end - 7; |
6492 | 902 while(s < mm_end) |
903 { | |
904 __asm __volatile( | |
905 PREFETCH" 32%1\n\t" | |
906 "movq %1, %%mm0\n\t" | |
907 "movq %1, %%mm1\n\t" | |
908 "movq %1, %%mm2\n\t" | |
909 "pand %2, %%mm0\n\t" | |
910 "pand %3, %%mm1\n\t" | |
911 "pand %4, %%mm2\n\t" | |
912 "psllq $3, %%mm0\n\t" | |
913 "psrlq $2, %%mm1\n\t" | |
914 "psrlq $7, %%mm2\n\t" | |
915 "movq %%mm0, %%mm3\n\t" | |
916 "movq %%mm1, %%mm4\n\t" | |
917 "movq %%mm2, %%mm5\n\t" | |
918 "punpcklwd %5, %%mm0\n\t" | |
919 "punpcklwd %5, %%mm1\n\t" | |
920 "punpcklwd %5, %%mm2\n\t" | |
921 "punpckhwd %5, %%mm3\n\t" | |
922 "punpckhwd %5, %%mm4\n\t" | |
923 "punpckhwd %5, %%mm5\n\t" | |
924 "psllq $8, %%mm1\n\t" | |
925 "psllq $16, %%mm2\n\t" | |
926 "por %%mm1, %%mm0\n\t" | |
927 "por %%mm2, %%mm0\n\t" | |
928 "psllq $8, %%mm4\n\t" | |
929 "psllq $16, %%mm5\n\t" | |
930 "por %%mm4, %%mm3\n\t" | |
931 "por %%mm5, %%mm3\n\t" | |
932 | |
933 "movq %%mm0, %%mm6\n\t" | |
934 "movq %%mm3, %%mm7\n\t" | |
935 | |
936 "movq 8%1, %%mm0\n\t" | |
937 "movq 8%1, %%mm1\n\t" | |
938 "movq 8%1, %%mm2\n\t" | |
939 "pand %2, %%mm0\n\t" | |
940 "pand %3, %%mm1\n\t" | |
941 "pand %4, %%mm2\n\t" | |
942 "psllq $3, %%mm0\n\t" | |
943 "psrlq $2, %%mm1\n\t" | |
944 "psrlq $7, %%mm2\n\t" | |
945 "movq %%mm0, %%mm3\n\t" | |
946 "movq %%mm1, %%mm4\n\t" | |
947 "movq %%mm2, %%mm5\n\t" | |
948 "punpcklwd %5, %%mm0\n\t" | |
949 "punpcklwd %5, %%mm1\n\t" | |
950 "punpcklwd %5, %%mm2\n\t" | |
951 "punpckhwd %5, %%mm3\n\t" | |
952 "punpckhwd %5, %%mm4\n\t" | |
953 "punpckhwd %5, %%mm5\n\t" | |
954 "psllq $8, %%mm1\n\t" | |
955 "psllq $16, %%mm2\n\t" | |
956 "por %%mm1, %%mm0\n\t" | |
957 "por %%mm2, %%mm0\n\t" | |
958 "psllq $8, %%mm4\n\t" | |
959 "psllq $16, %%mm5\n\t" | |
960 "por %%mm4, %%mm3\n\t" | |
961 "por %%mm5, %%mm3\n\t" | |
962 | |
963 :"=m"(*d) | |
964 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | |
965 :"memory"); | |
966 /* Borrowed 32 to 24 */ | |
967 __asm __volatile( | |
968 "movq %%mm0, %%mm4\n\t" | |
969 "movq %%mm3, %%mm5\n\t" | |
970 "movq %%mm6, %%mm0\n\t" | |
971 "movq %%mm7, %%mm1\n\t" | |
972 | |
973 "movq %%mm4, %%mm6\n\t" | |
974 "movq %%mm5, %%mm7\n\t" | |
975 "movq %%mm0, %%mm2\n\t" | |
976 "movq %%mm1, %%mm3\n\t" | |
977 | |
978 "psrlq $8, %%mm2\n\t" | |
979 "psrlq $8, %%mm3\n\t" | |
980 "psrlq $8, %%mm6\n\t" | |
981 "psrlq $8, %%mm7\n\t" | |
982 "pand %2, %%mm0\n\t" | |
983 "pand %2, %%mm1\n\t" | |
984 "pand %2, %%mm4\n\t" | |
985 "pand %2, %%mm5\n\t" | |
986 "pand %3, %%mm2\n\t" | |
987 "pand %3, %%mm3\n\t" | |
988 "pand %3, %%mm6\n\t" | |
989 "pand %3, %%mm7\n\t" | |
990 "por %%mm2, %%mm0\n\t" | |
991 "por %%mm3, %%mm1\n\t" | |
992 "por %%mm6, %%mm4\n\t" | |
993 "por %%mm7, %%mm5\n\t" | |
994 | |
995 "movq %%mm1, %%mm2\n\t" | |
996 "movq %%mm4, %%mm3\n\t" | |
997 "psllq $48, %%mm2\n\t" | |
998 "psllq $32, %%mm3\n\t" | |
999 "pand %4, %%mm2\n\t" | |
1000 "pand %5, %%mm3\n\t" | |
1001 "por %%mm2, %%mm0\n\t" | |
1002 "psrlq $16, %%mm1\n\t" | |
1003 "psrlq $32, %%mm4\n\t" | |
1004 "psllq $16, %%mm5\n\t" | |
1005 "por %%mm3, %%mm1\n\t" | |
1006 "pand %6, %%mm5\n\t" | |
1007 "por %%mm5, %%mm4\n\t" | |
1008 | |
1009 MOVNTQ" %%mm0, %0\n\t" | |
1010 MOVNTQ" %%mm1, 8%0\n\t" | |
1011 MOVNTQ" %%mm4, 16%0" | |
1012 | |
1013 :"=m"(*d) | |
1014 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
1015 :"memory"); | |
1016 d += 24; | |
1017 s += 8; | |
1018 } | |
2741 | 1019 __asm __volatile(SFENCE:::"memory"); |
1020 __asm __volatile(EMMS:::"memory"); | |
6492 | 1021 #endif |
1022 while(s < end) | |
1023 { | |
1024 register uint16_t bgr; | |
1025 bgr = *s++; | |
1026 *d++ = (bgr&0x1F)<<3; | |
1027 *d++ = (bgr&0x3E0)>>2; | |
1028 *d++ = (bgr&0x7C00)>>7; | |
1029 } | |
1030 } | |
1031 | |
1032 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
1033 { | |
1034 const uint16_t *end; | |
1035 #ifdef HAVE_MMX | |
1036 const uint16_t *mm_end; | |
1037 #endif | |
1038 uint8_t *d = (uint8_t *)dst; | |
1039 const uint16_t *s = (const uint16_t *)src; | |
1040 end = s + src_size/2; | |
1041 #ifdef HAVE_MMX | |
1042 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
6605 | 1043 mm_end = end - 7; |
6492 | 1044 while(s < mm_end) |
2718 | 1045 { |
6492 | 1046 __asm __volatile( |
1047 PREFETCH" 32%1\n\t" | |
1048 "movq %1, %%mm0\n\t" | |
1049 "movq %1, %%mm1\n\t" | |
1050 "movq %1, %%mm2\n\t" | |
1051 "pand %2, %%mm0\n\t" | |
1052 "pand %3, %%mm1\n\t" | |
1053 "pand %4, %%mm2\n\t" | |
1054 "psllq $3, %%mm0\n\t" | |
1055 "psrlq $3, %%mm1\n\t" | |
1056 "psrlq $8, %%mm2\n\t" | |
1057 "movq %%mm0, %%mm3\n\t" | |
1058 "movq %%mm1, %%mm4\n\t" | |
1059 "movq %%mm2, %%mm5\n\t" | |
1060 "punpcklwd %5, %%mm0\n\t" | |
1061 "punpcklwd %5, %%mm1\n\t" | |
1062 "punpcklwd %5, %%mm2\n\t" | |
1063 "punpckhwd %5, %%mm3\n\t" | |
1064 "punpckhwd %5, %%mm4\n\t" | |
1065 "punpckhwd %5, %%mm5\n\t" | |
1066 "psllq $8, %%mm1\n\t" | |
1067 "psllq $16, %%mm2\n\t" | |
1068 "por %%mm1, %%mm0\n\t" | |
1069 "por %%mm2, %%mm0\n\t" | |
1070 "psllq $8, %%mm4\n\t" | |
1071 "psllq $16, %%mm5\n\t" | |
1072 "por %%mm4, %%mm3\n\t" | |
1073 "por %%mm5, %%mm3\n\t" | |
1074 | |
1075 "movq %%mm0, %%mm6\n\t" | |
1076 "movq %%mm3, %%mm7\n\t" | |
1077 | |
1078 "movq 8%1, %%mm0\n\t" | |
1079 "movq 8%1, %%mm1\n\t" | |
1080 "movq 8%1, %%mm2\n\t" | |
1081 "pand %2, %%mm0\n\t" | |
1082 "pand %3, %%mm1\n\t" | |
1083 "pand %4, %%mm2\n\t" | |
1084 "psllq $3, %%mm0\n\t" | |
1085 "psrlq $3, %%mm1\n\t" | |
1086 "psrlq $8, %%mm2\n\t" | |
1087 "movq %%mm0, %%mm3\n\t" | |
1088 "movq %%mm1, %%mm4\n\t" | |
1089 "movq %%mm2, %%mm5\n\t" | |
1090 "punpcklwd %5, %%mm0\n\t" | |
1091 "punpcklwd %5, %%mm1\n\t" | |
1092 "punpcklwd %5, %%mm2\n\t" | |
1093 "punpckhwd %5, %%mm3\n\t" | |
1094 "punpckhwd %5, %%mm4\n\t" | |
1095 "punpckhwd %5, %%mm5\n\t" | |
1096 "psllq $8, %%mm1\n\t" | |
1097 "psllq $16, %%mm2\n\t" | |
1098 "por %%mm1, %%mm0\n\t" | |
1099 "por %%mm2, %%mm0\n\t" | |
1100 "psllq $8, %%mm4\n\t" | |
1101 "psllq $16, %%mm5\n\t" | |
1102 "por %%mm4, %%mm3\n\t" | |
1103 "por %%mm5, %%mm3\n\t" | |
1104 :"=m"(*d) | |
1105 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |
1106 :"memory"); | |
1107 /* Borrowed 32 to 24 */ | |
1108 __asm __volatile( | |
1109 "movq %%mm0, %%mm4\n\t" | |
1110 "movq %%mm3, %%mm5\n\t" | |
1111 "movq %%mm6, %%mm0\n\t" | |
1112 "movq %%mm7, %%mm1\n\t" | |
1113 | |
1114 "movq %%mm4, %%mm6\n\t" | |
1115 "movq %%mm5, %%mm7\n\t" | |
1116 "movq %%mm0, %%mm2\n\t" | |
1117 "movq %%mm1, %%mm3\n\t" | |
1118 | |
1119 "psrlq $8, %%mm2\n\t" | |
1120 "psrlq $8, %%mm3\n\t" | |
1121 "psrlq $8, %%mm6\n\t" | |
1122 "psrlq $8, %%mm7\n\t" | |
1123 "pand %2, %%mm0\n\t" | |
1124 "pand %2, %%mm1\n\t" | |
1125 "pand %2, %%mm4\n\t" | |
1126 "pand %2, %%mm5\n\t" | |
1127 "pand %3, %%mm2\n\t" | |
1128 "pand %3, %%mm3\n\t" | |
1129 "pand %3, %%mm6\n\t" | |
1130 "pand %3, %%mm7\n\t" | |
1131 "por %%mm2, %%mm0\n\t" | |
1132 "por %%mm3, %%mm1\n\t" | |
1133 "por %%mm6, %%mm4\n\t" | |
1134 "por %%mm7, %%mm5\n\t" | |
1135 | |
1136 "movq %%mm1, %%mm2\n\t" | |
1137 "movq %%mm4, %%mm3\n\t" | |
1138 "psllq $48, %%mm2\n\t" | |
1139 "psllq $32, %%mm3\n\t" | |
1140 "pand %4, %%mm2\n\t" | |
1141 "pand %5, %%mm3\n\t" | |
1142 "por %%mm2, %%mm0\n\t" | |
1143 "psrlq $16, %%mm1\n\t" | |
1144 "psrlq $32, %%mm4\n\t" | |
1145 "psllq $16, %%mm5\n\t" | |
1146 "por %%mm3, %%mm1\n\t" | |
1147 "pand %6, %%mm5\n\t" | |
1148 "por %%mm5, %%mm4\n\t" | |
1149 | |
1150 MOVNTQ" %%mm0, %0\n\t" | |
1151 MOVNTQ" %%mm1, 8%0\n\t" | |
1152 MOVNTQ" %%mm4, 16%0" | |
1153 | |
1154 :"=m"(*d) | |
1155 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
1156 :"memory"); | |
1157 d += 24; | |
1158 s += 8; | |
1159 } | |
1160 __asm __volatile(SFENCE:::"memory"); | |
1161 __asm __volatile(EMMS:::"memory"); | |
1162 #endif | |
1163 while(s < end) | |
1164 { | |
1165 register uint16_t bgr; | |
1166 bgr = *s++; | |
1167 *d++ = (bgr&0x1F)<<3; | |
1168 *d++ = (bgr&0x7E0)>>3; | |
1169 *d++ = (bgr&0xF800)>>8; | |
1170 } | |
1171 } | |
2718 | 1172 |
6492 | 1173 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
1174 { | |
1175 const uint16_t *end; | |
1176 #ifdef HAVE_MMX | |
1177 const uint16_t *mm_end; | |
1178 #endif | |
1179 uint8_t *d = (uint8_t *)dst; | |
1180 const uint16_t *s = (const uint16_t *)src; | |
1181 end = s + src_size/2; | |
1182 #ifdef HAVE_MMX | |
1183 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
1184 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | |
6605 | 1185 mm_end = end - 3; |
6492 | 1186 while(s < mm_end) |
1187 { | |
1188 __asm __volatile( | |
1189 PREFETCH" 32%1\n\t" | |
1190 "movq %1, %%mm0\n\t" | |
1191 "movq %1, %%mm1\n\t" | |
1192 "movq %1, %%mm2\n\t" | |
1193 "pand %2, %%mm0\n\t" | |
1194 "pand %3, %%mm1\n\t" | |
1195 "pand %4, %%mm2\n\t" | |
1196 "psllq $3, %%mm0\n\t" | |
1197 "psrlq $2, %%mm1\n\t" | |
1198 "psrlq $7, %%mm2\n\t" | |
1199 "movq %%mm0, %%mm3\n\t" | |
1200 "movq %%mm1, %%mm4\n\t" | |
1201 "movq %%mm2, %%mm5\n\t" | |
1202 "punpcklwd %%mm7, %%mm0\n\t" | |
1203 "punpcklwd %%mm7, %%mm1\n\t" | |
1204 "punpcklwd %%mm7, %%mm2\n\t" | |
1205 "punpckhwd %%mm7, %%mm3\n\t" | |
1206 "punpckhwd %%mm7, %%mm4\n\t" | |
1207 "punpckhwd %%mm7, %%mm5\n\t" | |
1208 "psllq $8, %%mm1\n\t" | |
1209 "psllq $16, %%mm2\n\t" | |
1210 "por %%mm1, %%mm0\n\t" | |
1211 "por %%mm2, %%mm0\n\t" | |
1212 "psllq $8, %%mm4\n\t" | |
1213 "psllq $16, %%mm5\n\t" | |
1214 "por %%mm4, %%mm3\n\t" | |
1215 "por %%mm5, %%mm3\n\t" | |
1216 MOVNTQ" %%mm0, %0\n\t" | |
1217 MOVNTQ" %%mm3, 8%0\n\t" | |
1218 :"=m"(*d) | |
1219 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) | |
1220 :"memory"); | |
1221 d += 16; | |
1222 s += 4; | |
1223 } | |
1224 __asm __volatile(SFENCE:::"memory"); | |
1225 __asm __volatile(EMMS:::"memory"); | |
1226 #endif | |
1227 while(s < end) | |
1228 { | |
9430 | 1229 #if 0 //slightly slower on athlon |
1230 int bgr= *s++; | |
1231 *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9); | |
1232 #else | |
1233 //FIXME this is very likely wrong for bigendian (and the following converters too) | |
6492 | 1234 register uint16_t bgr; |
1235 bgr = *s++; | |
1236 *d++ = (bgr&0x1F)<<3; | |
1237 *d++ = (bgr&0x3E0)>>2; | |
1238 *d++ = (bgr&0x7C00)>>7; | |
1239 *d++ = 0; | |
9430 | 1240 #endif |
2718 | 1241 } |
6492 | 1242 } |
1243 | |
1244 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
1245 { | |
1246 const uint16_t *end; | |
1247 #ifdef HAVE_MMX | |
1248 const uint16_t *mm_end; | |
2741 | 1249 #endif |
6492 | 1250 uint8_t *d = (uint8_t *)dst; |
1251 const uint16_t *s = (uint16_t *)src; | |
1252 end = s + src_size/2; | |
1253 #ifdef HAVE_MMX | |
1254 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
1255 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | |
6605 | 1256 mm_end = end - 3; |
6492 | 1257 while(s < mm_end) |
1258 { | |
1259 __asm __volatile( | |
1260 PREFETCH" 32%1\n\t" | |
1261 "movq %1, %%mm0\n\t" | |
1262 "movq %1, %%mm1\n\t" | |
1263 "movq %1, %%mm2\n\t" | |
1264 "pand %2, %%mm0\n\t" | |
1265 "pand %3, %%mm1\n\t" | |
1266 "pand %4, %%mm2\n\t" | |
1267 "psllq $3, %%mm0\n\t" | |
1268 "psrlq $3, %%mm1\n\t" | |
1269 "psrlq $8, %%mm2\n\t" | |
1270 "movq %%mm0, %%mm3\n\t" | |
1271 "movq %%mm1, %%mm4\n\t" | |
1272 "movq %%mm2, %%mm5\n\t" | |
1273 "punpcklwd %%mm7, %%mm0\n\t" | |
1274 "punpcklwd %%mm7, %%mm1\n\t" | |
1275 "punpcklwd %%mm7, %%mm2\n\t" | |
1276 "punpckhwd %%mm7, %%mm3\n\t" | |
1277 "punpckhwd %%mm7, %%mm4\n\t" | |
1278 "punpckhwd %%mm7, %%mm5\n\t" | |
1279 "psllq $8, %%mm1\n\t" | |
1280 "psllq $16, %%mm2\n\t" | |
1281 "por %%mm1, %%mm0\n\t" | |
1282 "por %%mm2, %%mm0\n\t" | |
1283 "psllq $8, %%mm4\n\t" | |
1284 "psllq $16, %%mm5\n\t" | |
1285 "por %%mm4, %%mm3\n\t" | |
1286 "por %%mm5, %%mm3\n\t" | |
1287 MOVNTQ" %%mm0, %0\n\t" | |
1288 MOVNTQ" %%mm3, 8%0\n\t" | |
1289 :"=m"(*d) | |
1290 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) | |
1291 :"memory"); | |
1292 d += 16; | |
1293 s += 4; | |
1294 } | |
1295 __asm __volatile(SFENCE:::"memory"); | |
1296 __asm __volatile(EMMS:::"memory"); | |
1297 #endif | |
1298 while(s < end) | |
1299 { | |
1300 register uint16_t bgr; | |
1301 bgr = *s++; | |
1302 *d++ = (bgr&0x1F)<<3; | |
1303 *d++ = (bgr&0x7E0)>>3; | |
1304 *d++ = (bgr&0xF800)>>8; | |
1305 *d++ = 0; | |
1306 } | |
2718 | 1307 } |
2694 | 1308 |
3132 | 1309 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
2755 | 1310 { |
1311 #ifdef HAVE_MMX | |
6492 | 1312 /* TODO: unroll this loop */ |
2755 | 1313 asm volatile ( |
1314 "xorl %%eax, %%eax \n\t" | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1315 ".balign 16 \n\t" |
2755 | 1316 "1: \n\t" |
1317 PREFETCH" 32(%0, %%eax) \n\t" | |
1318 "movq (%0, %%eax), %%mm0 \n\t" | |
1319 "movq %%mm0, %%mm1 \n\t" | |
1320 "movq %%mm0, %%mm2 \n\t" | |
1321 "pslld $16, %%mm0 \n\t" | |
1322 "psrld $16, %%mm1 \n\t" | |
6492 | 1323 "pand "MANGLE(mask32r)", %%mm0 \n\t" |
1324 "pand "MANGLE(mask32g)", %%mm2 \n\t" | |
1325 "pand "MANGLE(mask32b)", %%mm1 \n\t" | |
2755 | 1326 "por %%mm0, %%mm2 \n\t" |
1327 "por %%mm1, %%mm2 \n\t" | |
1328 MOVNTQ" %%mm2, (%1, %%eax) \n\t" | |
6096 | 1329 "addl $8, %%eax \n\t" |
2755 | 1330 "cmpl %2, %%eax \n\t" |
1331 " jb 1b \n\t" | |
6605 | 1332 :: "r" (src), "r"(dst), "r" (src_size-7) |
2755 | 1333 : "%eax" |
1334 ); | |
2766 | 1335 |
1336 __asm __volatile(SFENCE:::"memory"); | |
1337 __asm __volatile(EMMS:::"memory"); | |
2755 | 1338 #else |
6492 | 1339 unsigned i; |
1340 unsigned num_pixels = src_size >> 2; | |
2755 | 1341 for(i=0; i<num_pixels; i++) |
1342 { | |
9988
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1343 #ifdef WORDS_BIGENDIAN |
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1344 dst[4*i + 1] = src[4*i + 3]; |
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1345 dst[4*i + 2] = src[4*i + 2]; |
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1346 dst[4*i + 3] = src[4*i + 1]; |
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1347 #else |
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1348 dst[4*i + 0] = src[4*i + 2]; |
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1349 dst[4*i + 1] = src[4*i + 1]; |
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1350 dst[4*i + 2] = src[4*i + 0]; |
a32fb6812221
bigendian fix by (Samuel Kleiner <kleiner at cd dot chalmers dot se>)
michael
parents:
9987
diff
changeset
|
1351 #endif |
2755 | 1352 } |
1353 #endif | |
1354 } | |
1355 | |
5582 | 1356 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
1357 { | |
6492 | 1358 unsigned i; |
5582 | 1359 #ifdef HAVE_MMX |
1360 int mmx_size= 23 - src_size; | |
1361 asm volatile ( | |
1362 "movq "MANGLE(mask24r)", %%mm5 \n\t" | |
1363 "movq "MANGLE(mask24g)", %%mm6 \n\t" | |
1364 "movq "MANGLE(mask24b)", %%mm7 \n\t" | |
1365 ".balign 16 \n\t" | |
1366 "1: \n\t" | |
1367 PREFETCH" 32(%1, %%eax) \n\t" | |
1368 "movq (%1, %%eax), %%mm0 \n\t" // BGR BGR BG | |
1369 "movq (%1, %%eax), %%mm1 \n\t" // BGR BGR BG | |
1370 "movq 2(%1, %%eax), %%mm2 \n\t" // R BGR BGR B | |
1371 "psllq $16, %%mm0 \n\t" // 00 BGR BGR | |
1372 "pand %%mm5, %%mm0 \n\t" | |
1373 "pand %%mm6, %%mm1 \n\t" | |
1374 "pand %%mm7, %%mm2 \n\t" | |
1375 "por %%mm0, %%mm1 \n\t" | |
1376 "por %%mm2, %%mm1 \n\t" | |
1377 "movq 6(%1, %%eax), %%mm0 \n\t" // BGR BGR BG | |
1378 MOVNTQ" %%mm1, (%2, %%eax) \n\t" // RGB RGB RG | |
1379 "movq 8(%1, %%eax), %%mm1 \n\t" // R BGR BGR B | |
1380 "movq 10(%1, %%eax), %%mm2 \n\t" // GR BGR BGR | |
1381 "pand %%mm7, %%mm0 \n\t" | |
1382 "pand %%mm5, %%mm1 \n\t" | |
1383 "pand %%mm6, %%mm2 \n\t" | |
1384 "por %%mm0, %%mm1 \n\t" | |
1385 "por %%mm2, %%mm1 \n\t" | |
1386 "movq 14(%1, %%eax), %%mm0 \n\t" // R BGR BGR B | |
1387 MOVNTQ" %%mm1, 8(%2, %%eax) \n\t" // B RGB RGB R | |
1388 "movq 16(%1, %%eax), %%mm1 \n\t" // GR BGR BGR | |
1389 "movq 18(%1, %%eax), %%mm2 \n\t" // BGR BGR BG | |
1390 "pand %%mm6, %%mm0 \n\t" | |
1391 "pand %%mm7, %%mm1 \n\t" | |
1392 "pand %%mm5, %%mm2 \n\t" | |
1393 "por %%mm0, %%mm1 \n\t" | |
1394 "por %%mm2, %%mm1 \n\t" | |
1395 MOVNTQ" %%mm1, 16(%2, %%eax) \n\t" | |
1396 "addl $24, %%eax \n\t" | |
1397 " js 1b \n\t" | |
1398 : "+a" (mmx_size) | |
1399 : "r" (src-mmx_size), "r"(dst-mmx_size) | |
1400 ); | |
1401 | |
1402 __asm __volatile(SFENCE:::"memory"); | |
1403 __asm __volatile(EMMS:::"memory"); | |
1404 | |
6096 | 1405 if(mmx_size==23) return; //finihsed, was multiple of 8 |
6492 | 1406 |
5582 | 1407 src+= src_size; |
1408 dst+= src_size; | |
6492 | 1409 src_size= 23-mmx_size; |
5582 | 1410 src-= src_size; |
1411 dst-= src_size; | |
1412 #endif | |
1413 for(i=0; i<src_size; i+=3) | |
1414 { | |
6492 | 1415 register uint8_t x; |
5582 | 1416 x = src[i + 2]; |
1417 dst[i + 1] = src[i + 1]; | |
1418 dst[i + 2] = src[i + 0]; | |
1419 dst[i + 0] = x; | |
1420 } | |
1421 } | |
1422 | |
5588 | 1423 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
2725 | 1424 unsigned int width, unsigned int height, |
9392 | 1425 int lumStride, int chromStride, int dstStride, int vertLumPerChroma) |
2701 | 1426 { |
6492 | 1427 unsigned y; |
1428 const unsigned chromWidth= width>>1; | |
2723 | 1429 for(y=0; y<height; y++) |
1430 { | |
2702 | 1431 #ifdef HAVE_MMX |
2723 | 1432 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) |
1433 asm volatile( | |
1434 "xorl %%eax, %%eax \n\t" | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1435 ".balign 16 \n\t" |
2723 | 1436 "1: \n\t" |
1437 PREFETCH" 32(%1, %%eax, 2) \n\t" | |
1438 PREFETCH" 32(%2, %%eax) \n\t" | |
1439 PREFETCH" 32(%3, %%eax) \n\t" | |
1440 "movq (%2, %%eax), %%mm0 \n\t" // U(0) | |
1441 "movq %%mm0, %%mm2 \n\t" // U(0) | |
1442 "movq (%3, %%eax), %%mm1 \n\t" // V(0) | |
1443 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1444 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | |
1445 | |
1446 "movq (%1, %%eax,2), %%mm3 \n\t" // Y(0) | |
1447 "movq 8(%1, %%eax,2), %%mm5 \n\t" // Y(8) | |
1448 "movq %%mm3, %%mm4 \n\t" // Y(0) | |
1449 "movq %%mm5, %%mm6 \n\t" // Y(8) | |
1450 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) | |
1451 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) | |
1452 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) | |
1453 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) | |
2702 | 1454 |
2723 | 1455 MOVNTQ" %%mm3, (%0, %%eax, 4) \n\t" |
1456 MOVNTQ" %%mm4, 8(%0, %%eax, 4) \n\t" | |
1457 MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t" | |
1458 MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t" | |
2702 | 1459 |
2723 | 1460 "addl $8, %%eax \n\t" |
1461 "cmpl %4, %%eax \n\t" | |
1462 " jb 1b \n\t" | |
9394 | 1463 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) |
2723 | 1464 : "%eax" |
1465 ); | |
2702 | 1466 #else |
9393
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1467 |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1468 #if defined ARCH_ALPHA && defined HAVE_MVI |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1469 #define pl2yuy2(n) \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1470 y1 = yc[n]; \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1471 y2 = yc2[n]; \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1472 u = uc[n]; \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1473 v = vc[n]; \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1474 asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1475 asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1476 asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1477 asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1478 yuv1 = (u << 8) + (v << 24); \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1479 yuv2 = yuv1 + y2; \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1480 yuv1 += y1; \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1481 qdst[n] = yuv1; \ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1482 qdst2[n] = yuv2; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1483 |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1484 int i; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1485 uint64_t *qdst = (uint64_t *) dst; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1486 uint64_t *qdst2 = (uint64_t *) (dst + dstStride); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1487 const uint32_t *yc = (uint32_t *) ysrc; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1488 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1489 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1490 for(i = 0; i < chromWidth; i += 8){ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1491 uint64_t y1, y2, yuv1, yuv2; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1492 uint64_t u, v; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1493 /* Prefetch */ |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1494 asm("ldq $31,64(%0)" :: "r"(yc)); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1495 asm("ldq $31,64(%0)" :: "r"(yc2)); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1496 asm("ldq $31,64(%0)" :: "r"(uc)); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1497 asm("ldq $31,64(%0)" :: "r"(vc)); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1498 |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1499 pl2yuy2(0); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1500 pl2yuy2(1); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1501 pl2yuy2(2); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1502 pl2yuy2(3); |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1503 |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1504 yc += 4; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1505 yc2 += 4; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1506 uc += 4; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1507 vc += 4; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1508 qdst += 4; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1509 qdst2 += 4; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1510 } |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1511 y++; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1512 ysrc += lumStride; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1513 dst += dstStride; |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1514 |
5f9c97070b56
yv12 -> yuy2 converter in alpha asm (from mplayerxp)
michael
parents:
9392
diff
changeset
|
1515 #elif __WORDSIZE >= 64 |
2723 | 1516 int i; |
6492 | 1517 uint64_t *ldst = (uint64_t *) dst; |
1518 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | |
1519 for(i = 0; i < chromWidth; i += 2){ | |
1520 uint64_t k, l; | |
1521 k = yc[0] + (uc[0] << 8) + | |
1522 (yc[1] << 16) + (vc[0] << 24); | |
1523 l = yc[2] + (uc[1] << 8) + | |
1524 (yc[3] << 16) + (vc[1] << 24); | |
1525 *ldst++ = k + (l << 32); | |
1526 yc += 4; | |
1527 uc += 2; | |
1528 vc += 2; | |
2723 | 1529 } |
6492 | 1530 |
1531 #else | |
1532 int i, *idst = (int32_t *) dst; | |
1533 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | |
1534 for(i = 0; i < chromWidth; i++){ | |
1535 *idst++ = yc[0] + (uc[0] << 8) + | |
1536 (yc[1] << 16) + (vc[0] << 24); | |
1537 yc += 2; | |
1538 uc++; | |
1539 vc++; | |
1540 } | |
1541 #endif | |
2723 | 1542 #endif |
5588 | 1543 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) |
2723 | 1544 { |
1545 usrc += chromStride; | |
1546 vsrc += chromStride; | |
1547 } | |
1548 ysrc += lumStride; | |
1549 dst += dstStride; | |
2701 | 1550 } |
2723 | 1551 #ifdef HAVE_MMX |
1552 asm( EMMS" \n\t" | |
1553 SFENCE" \n\t" | |
1554 :::"memory"); | |
2702 | 1555 #endif |
2701 | 1556 } |
1557 | |
2724 | 1558 /** |
1559 * | |
1560 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
1561 * problem for anyone then tell me, and ill fix it) | |
1562 */ | |
5588 | 1563 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1564 unsigned int width, unsigned int height, | |
9392 | 1565 int lumStride, int chromStride, int dstStride) |
5588 | 1566 { |
1567 //FIXME interpolate chroma | |
1568 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); | |
1569 } | |
1570 | |
1571 /** | |
1572 * | |
1573 * width should be a multiple of 16 | |
1574 */ | |
1575 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
1576 unsigned int width, unsigned int height, | |
9392 | 1577 int lumStride, int chromStride, int dstStride) |
5588 | 1578 { |
1579 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); | |
1580 } | |
1581 | |
1582 /** | |
1583 * | |
1584 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
1585 * problem for anyone then tell me, and ill fix it) | |
1586 */ | |
3132 | 1587 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
2725 | 1588 unsigned int width, unsigned int height, |
9392 | 1589 int lumStride, int chromStride, int srcStride) |
2701 | 1590 { |
6492 | 1591 unsigned y; |
1592 const unsigned chromWidth= width>>1; | |
2724 | 1593 for(y=0; y<height; y+=2) |
1594 { | |
2704 | 1595 #ifdef HAVE_MMX |
2724 | 1596 asm volatile( |
1597 "xorl %%eax, %%eax \n\t" | |
1598 "pcmpeqw %%mm7, %%mm7 \n\t" | |
1599 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1600 ".balign 16 \n\t" |
2724 | 1601 "1: \n\t" |
1602 PREFETCH" 64(%0, %%eax, 4) \n\t" | |
1603 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1604 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1605 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) | |
1606 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) | |
1607 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) | |
1608 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) | |
1609 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | |
1610 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | |
1611 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1612 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | |
1613 | |
1614 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t" | |
2704 | 1615 |
2724 | 1616 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(8) |
1617 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(12) | |
1618 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) | |
1619 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) | |
1620 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) | |
1621 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) | |
1622 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | |
1623 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | |
1624 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | |
1625 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | |
2704 | 1626 |
2724 | 1627 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t" |
1628 | |
1629 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) | |
1630 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | |
1631 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | |
1632 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | |
1633 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | |
1634 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | |
1635 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | |
1636 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | |
2704 | 1637 |
2724 | 1638 MOVNTQ" %%mm0, (%3, %%eax) \n\t" |
1639 MOVNTQ" %%mm2, (%2, %%eax) \n\t" | |
1640 | |
1641 "addl $8, %%eax \n\t" | |
1642 "cmpl %4, %%eax \n\t" | |
1643 " jb 1b \n\t" | |
9394 | 1644 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2725 | 1645 : "memory", "%eax" |
1646 ); | |
2704 | 1647 |
2806 | 1648 ydst += lumStride; |
1649 src += srcStride; | |
1650 | |
2725 | 1651 asm volatile( |
1652 "xorl %%eax, %%eax \n\t" | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1653 ".balign 16 \n\t" |
2724 | 1654 "1: \n\t" |
1655 PREFETCH" 64(%0, %%eax, 4) \n\t" | |
1656 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1657 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1658 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) | |
1659 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12) | |
1660 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | |
1661 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | |
1662 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | |
1663 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | |
1664 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | |
1665 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | |
2704 | 1666 |
2724 | 1667 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t" |
1668 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t" | |
1669 | |
1670 "addl $8, %%eax \n\t" | |
2725 | 1671 "cmpl %4, %%eax \n\t" |
2724 | 1672 " jb 1b \n\t" |
2704 | 1673 |
9394 | 1674 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2724 | 1675 : "memory", "%eax" |
1676 ); | |
2704 | 1677 #else |
6492 | 1678 unsigned i; |
2724 | 1679 for(i=0; i<chromWidth; i++) |
1680 { | |
1681 ydst[2*i+0] = src[4*i+0]; | |
1682 udst[i] = src[4*i+1]; | |
1683 ydst[2*i+1] = src[4*i+2]; | |
1684 vdst[i] = src[4*i+3]; | |
1685 } | |
1686 ydst += lumStride; | |
1687 src += srcStride; | |
1688 | |
1689 for(i=0; i<chromWidth; i++) | |
1690 { | |
1691 ydst[2*i+0] = src[4*i+0]; | |
1692 ydst[2*i+1] = src[4*i+2]; | |
1693 } | |
1694 #endif | |
1695 udst += chromStride; | |
1696 vdst += chromStride; | |
1697 ydst += lumStride; | |
1698 src += srcStride; | |
2701 | 1699 } |
2724 | 1700 #ifdef HAVE_MMX |
2847 | 1701 asm volatile( EMMS" \n\t" |
1702 SFENCE" \n\t" | |
1703 :::"memory"); | |
2704 | 1704 #endif |
2723 | 1705 } |
2801 | 1706 |
6484
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1707 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1708 uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
9392 | 1709 unsigned int width, unsigned int height, int lumStride, int chromStride) |
6484
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1710 { |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1711 /* Y Plane */ |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1712 memcpy(ydst, ysrc, width*height); |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1713 |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1714 /* XXX: implement upscaling for U,V */ |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1715 } |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1716 |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1717 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1718 { |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1719 int x,y; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1720 |
9256 | 1721 dst[0]= src[0]; |
1722 | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1723 // first line |
9256 | 1724 for(x=0; x<srcWidth-1; x++){ |
1725 dst[2*x+1]= (3*src[x] + src[x+1])>>2; | |
1726 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1727 } |
9256 | 1728 dst[2*srcWidth-1]= src[srcWidth-1]; |
1729 | |
1730 dst+= dstStride; | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1731 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1732 for(y=1; y<srcHeight; y++){ |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1733 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
9256 | 1734 const int mmxSize= srcWidth&~15; |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1735 asm volatile( |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1736 "movl %4, %%eax \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1737 "1: \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1738 "movq (%0, %%eax), %%mm0 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1739 "movq (%1, %%eax), %%mm1 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1740 "movq 1(%0, %%eax), %%mm2 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1741 "movq 1(%1, %%eax), %%mm3 \n\t" |
9256 | 1742 "movq -1(%0, %%eax), %%mm4 \n\t" |
1743 "movq -1(%1, %%eax), %%mm5 \n\t" | |
1744 PAVGB" %%mm0, %%mm5 \n\t" | |
1745 PAVGB" %%mm0, %%mm3 \n\t" | |
1746 PAVGB" %%mm0, %%mm5 \n\t" | |
1747 PAVGB" %%mm0, %%mm3 \n\t" | |
1748 PAVGB" %%mm1, %%mm4 \n\t" | |
1749 PAVGB" %%mm1, %%mm2 \n\t" | |
1750 PAVGB" %%mm1, %%mm4 \n\t" | |
1751 PAVGB" %%mm1, %%mm2 \n\t" | |
1752 "movq %%mm5, %%mm7 \n\t" | |
1753 "movq %%mm4, %%mm6 \n\t" | |
1754 "punpcklbw %%mm3, %%mm5 \n\t" | |
1755 "punpckhbw %%mm3, %%mm7 \n\t" | |
1756 "punpcklbw %%mm2, %%mm4 \n\t" | |
1757 "punpckhbw %%mm2, %%mm6 \n\t" | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1758 #if 1 |
9256 | 1759 MOVNTQ" %%mm5, (%2, %%eax, 2) \n\t" |
1760 MOVNTQ" %%mm7, 8(%2, %%eax, 2) \n\t" | |
1761 MOVNTQ" %%mm4, (%3, %%eax, 2) \n\t" | |
1762 MOVNTQ" %%mm6, 8(%3, %%eax, 2) \n\t" | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1763 #else |
9256 | 1764 "movq %%mm5, (%2, %%eax, 2) \n\t" |
1765 "movq %%mm7, 8(%2, %%eax, 2) \n\t" | |
1766 "movq %%mm4, (%3, %%eax, 2) \n\t" | |
1767 "movq %%mm6, 8(%3, %%eax, 2) \n\t" | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1768 #endif |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1769 "addl $8, %%eax \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1770 " js 1b \n\t" |
9256 | 1771 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1772 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1773 "g" (-mmxSize) |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1774 : "%eax" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1775 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1776 ); |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1777 #else |
9256 | 1778 const int mmxSize=1; |
1779 #endif | |
1780 dst[0 ]= (3*src[0] + src[srcStride])>>2; | |
1781 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1782 |
9256 | 1783 for(x=mmxSize-1; x<srcWidth-1; x++){ |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1784 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1785 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1786 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1787 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1788 } |
9256 | 1789 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; |
1790 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1791 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1792 dst+=dstStride*2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1793 src+=srcStride; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1794 } |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1795 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1796 // last line |
9256 | 1797 #if 1 |
1798 dst[0]= src[0]; | |
1799 | |
1800 for(x=0; x<srcWidth-1; x++){ | |
1801 dst[2*x+1]= (3*src[x] + src[x+1])>>2; | |
1802 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; | |
1803 } | |
1804 dst[2*srcWidth-1]= src[srcWidth-1]; | |
1805 #else | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1806 for(x=0; x<srcWidth; x++){ |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1807 dst[2*x+0]= |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1808 dst[2*x+1]= src[x]; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1809 } |
9256 | 1810 #endif |
1811 | |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1812 #ifdef HAVE_MMX |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1813 asm volatile( EMMS" \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1814 SFENCE" \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1815 :::"memory"); |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1816 #endif |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1817 } |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1818 |
2801 | 1819 /** |
1820 * | |
1821 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
1822 * problem for anyone then tell me, and ill fix it) | |
3132 | 1823 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version |
2801 | 1824 */ |
3132 | 1825 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
2801 | 1826 unsigned int width, unsigned int height, |
9392 | 1827 int lumStride, int chromStride, int srcStride) |
2801 | 1828 { |
6492 | 1829 unsigned y; |
1830 const unsigned chromWidth= width>>1; | |
2801 | 1831 for(y=0; y<height; y+=2) |
1832 { | |
2847 | 1833 #ifdef HAVE_MMX |
1834 asm volatile( | |
1835 "xorl %%eax, %%eax \n\t" | |
1836 "pcmpeqw %%mm7, %%mm7 \n\t" | |
1837 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |
1838 ".balign 16 \n\t" | |
1839 "1: \n\t" | |
1840 PREFETCH" 64(%0, %%eax, 4) \n\t" | |
1841 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) | |
1842 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4) | |
1843 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) | |
1844 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) | |
1845 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) | |
1846 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) | |
1847 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | |
1848 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | |
1849 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1850 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | |
1851 | |
1852 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t" | |
1853 | |
1854 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(8) | |
1855 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // UYVY UYVY(12) | |
1856 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) | |
1857 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) | |
1858 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) | |
1859 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) | |
1860 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | |
1861 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | |
1862 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | |
1863 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | |
1864 | |
1865 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t" | |
1866 | |
1867 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) | |
1868 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | |
1869 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | |
1870 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | |
1871 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | |
1872 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | |
1873 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | |
1874 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | |
1875 | |
1876 MOVNTQ" %%mm0, (%3, %%eax) \n\t" | |
1877 MOVNTQ" %%mm2, (%2, %%eax) \n\t" | |
1878 | |
1879 "addl $8, %%eax \n\t" | |
1880 "cmpl %4, %%eax \n\t" | |
1881 " jb 1b \n\t" | |
9394 | 1882 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2847 | 1883 : "memory", "%eax" |
1884 ); | |
1885 | |
1886 ydst += lumStride; | |
1887 src += srcStride; | |
1888 | |
1889 asm volatile( | |
1890 "xorl %%eax, %%eax \n\t" | |
1891 ".balign 16 \n\t" | |
1892 "1: \n\t" | |
1893 PREFETCH" 64(%0, %%eax, 4) \n\t" | |
1894 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1895 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1896 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) | |
1897 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12) | |
1898 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | |
1899 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | |
1900 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | |
1901 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | |
1902 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | |
1903 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | |
1904 | |
1905 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t" | |
1906 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t" | |
1907 | |
1908 "addl $8, %%eax \n\t" | |
1909 "cmpl %4, %%eax \n\t" | |
1910 " jb 1b \n\t" | |
1911 | |
9394 | 1912 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2847 | 1913 : "memory", "%eax" |
1914 ); | |
1915 #else | |
6492 | 1916 unsigned i; |
2801 | 1917 for(i=0; i<chromWidth; i++) |
1918 { | |
1919 udst[i] = src[4*i+0]; | |
1920 ydst[2*i+0] = src[4*i+1]; | |
1921 vdst[i] = src[4*i+2]; | |
1922 ydst[2*i+1] = src[4*i+3]; | |
1923 } | |
1924 ydst += lumStride; | |
1925 src += srcStride; | |
1926 | |
1927 for(i=0; i<chromWidth; i++) | |
1928 { | |
1929 ydst[2*i+0] = src[4*i+1]; | |
1930 ydst[2*i+1] = src[4*i+3]; | |
1931 } | |
2847 | 1932 #endif |
2801 | 1933 udst += chromStride; |
1934 vdst += chromStride; | |
1935 ydst += lumStride; | |
1936 src += srcStride; | |
1937 } | |
2847 | 1938 #ifdef HAVE_MMX |
1939 asm volatile( EMMS" \n\t" | |
1940 SFENCE" \n\t" | |
1941 :::"memory"); | |
1942 #endif | |
2801 | 1943 } |
1944 | |
3132 | 1945 /** |
1946 * | |
1947 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a | |
1948 * problem for anyone then tell me, and ill fix it) | |
4622 | 1949 * chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version |
3132 | 1950 */ |
1951 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
1952 unsigned int width, unsigned int height, | |
9392 | 1953 int lumStride, int chromStride, int srcStride) |
3132 | 1954 { |
6492 | 1955 unsigned y; |
1956 const unsigned chromWidth= width>>1; | |
4622 | 1957 #ifdef HAVE_MMX |
1958 for(y=0; y<height-2; y+=2) | |
1959 { | |
6492 | 1960 unsigned i; |
4622 | 1961 for(i=0; i<2; i++) |
1962 { | |
1963 asm volatile( | |
1964 "movl %2, %%eax \n\t" | |
4923 | 1965 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1966 "movq "MANGLE(w1111)", %%mm5 \n\t" | |
4622 | 1967 "pxor %%mm7, %%mm7 \n\t" |
1968 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1969 ".balign 16 \n\t" | |
1970 "1: \n\t" | |
1971 PREFETCH" 64(%0, %%ebx) \n\t" | |
1972 "movd (%0, %%ebx), %%mm0 \n\t" | |
1973 "movd 3(%0, %%ebx), %%mm1 \n\t" | |
1974 "punpcklbw %%mm7, %%mm0 \n\t" | |
1975 "punpcklbw %%mm7, %%mm1 \n\t" | |
1976 "movd 6(%0, %%ebx), %%mm2 \n\t" | |
1977 "movd 9(%0, %%ebx), %%mm3 \n\t" | |
1978 "punpcklbw %%mm7, %%mm2 \n\t" | |
1979 "punpcklbw %%mm7, %%mm3 \n\t" | |
1980 "pmaddwd %%mm6, %%mm0 \n\t" | |
1981 "pmaddwd %%mm6, %%mm1 \n\t" | |
1982 "pmaddwd %%mm6, %%mm2 \n\t" | |
1983 "pmaddwd %%mm6, %%mm3 \n\t" | |
1984 #ifndef FAST_BGR2YV12 | |
1985 "psrad $8, %%mm0 \n\t" | |
1986 "psrad $8, %%mm1 \n\t" | |
1987 "psrad $8, %%mm2 \n\t" | |
1988 "psrad $8, %%mm3 \n\t" | |
1989 #endif | |
1990 "packssdw %%mm1, %%mm0 \n\t" | |
1991 "packssdw %%mm3, %%mm2 \n\t" | |
1992 "pmaddwd %%mm5, %%mm0 \n\t" | |
1993 "pmaddwd %%mm5, %%mm2 \n\t" | |
1994 "packssdw %%mm2, %%mm0 \n\t" | |
1995 "psraw $7, %%mm0 \n\t" | |
1996 | |
1997 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1998 "movd 15(%0, %%ebx), %%mm1 \n\t" | |
1999 "punpcklbw %%mm7, %%mm4 \n\t" | |
2000 "punpcklbw %%mm7, %%mm1 \n\t" | |
2001 "movd 18(%0, %%ebx), %%mm2 \n\t" | |
2002 "movd 21(%0, %%ebx), %%mm3 \n\t" | |
2003 "punpcklbw %%mm7, %%mm2 \n\t" | |
2004 "punpcklbw %%mm7, %%mm3 \n\t" | |
2005 "pmaddwd %%mm6, %%mm4 \n\t" | |
2006 "pmaddwd %%mm6, %%mm1 \n\t" | |
2007 "pmaddwd %%mm6, %%mm2 \n\t" | |
2008 "pmaddwd %%mm6, %%mm3 \n\t" | |
2009 #ifndef FAST_BGR2YV12 | |
2010 "psrad $8, %%mm4 \n\t" | |
2011 "psrad $8, %%mm1 \n\t" | |
2012 "psrad $8, %%mm2 \n\t" | |
2013 "psrad $8, %%mm3 \n\t" | |
2014 #endif | |
2015 "packssdw %%mm1, %%mm4 \n\t" | |
2016 "packssdw %%mm3, %%mm2 \n\t" | |
2017 "pmaddwd %%mm5, %%mm4 \n\t" | |
2018 "pmaddwd %%mm5, %%mm2 \n\t" | |
2019 "addl $24, %%ebx \n\t" | |
2020 "packssdw %%mm2, %%mm4 \n\t" | |
2021 "psraw $7, %%mm4 \n\t" | |
2022 | |
2023 "packuswb %%mm4, %%mm0 \n\t" | |
4923 | 2024 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" |
4622 | 2025 |
2026 MOVNTQ" %%mm0, (%1, %%eax) \n\t" | |
2027 "addl $8, %%eax \n\t" | |
2028 " js 1b \n\t" | |
2029 : : "r" (src+width*3), "r" (ydst+width), "g" (-width) | |
2030 : "%eax", "%ebx" | |
2031 ); | |
2032 ydst += lumStride; | |
2033 src += srcStride; | |
2034 } | |
2035 src -= srcStride*2; | |
2036 asm volatile( | |
2037 "movl %4, %%eax \n\t" | |
4923 | 2038 "movq "MANGLE(w1111)", %%mm5 \n\t" |
2039 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |
4622 | 2040 "pxor %%mm7, %%mm7 \n\t" |
2041 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
2042 "addl %%ebx, %%ebx \n\t" | |
2043 ".balign 16 \n\t" | |
2044 "1: \n\t" | |
2045 PREFETCH" 64(%0, %%ebx) \n\t" | |
2046 PREFETCH" 64(%1, %%ebx) \n\t" | |
2047 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
2048 "movq (%0, %%ebx), %%mm0 \n\t" | |
2049 "movq (%1, %%ebx), %%mm1 \n\t" | |
2050 "movq 6(%0, %%ebx), %%mm2 \n\t" | |
2051 "movq 6(%1, %%ebx), %%mm3 \n\t" | |
2052 PAVGB" %%mm1, %%mm0 \n\t" | |
2053 PAVGB" %%mm3, %%mm2 \n\t" | |
2054 "movq %%mm0, %%mm1 \n\t" | |
2055 "movq %%mm2, %%mm3 \n\t" | |
2056 "psrlq $24, %%mm0 \n\t" | |
2057 "psrlq $24, %%mm2 \n\t" | |
2058 PAVGB" %%mm1, %%mm0 \n\t" | |
2059 PAVGB" %%mm3, %%mm2 \n\t" | |
2060 "punpcklbw %%mm7, %%mm0 \n\t" | |
2061 "punpcklbw %%mm7, %%mm2 \n\t" | |
2062 #else | |
2063 "movd (%0, %%ebx), %%mm0 \n\t" | |
2064 "movd (%1, %%ebx), %%mm1 \n\t" | |
2065 "movd 3(%0, %%ebx), %%mm2 \n\t" | |
2066 "movd 3(%1, %%ebx), %%mm3 \n\t" | |
2067 "punpcklbw %%mm7, %%mm0 \n\t" | |
2068 "punpcklbw %%mm7, %%mm1 \n\t" | |
2069 "punpcklbw %%mm7, %%mm2 \n\t" | |
2070 "punpcklbw %%mm7, %%mm3 \n\t" | |
2071 "paddw %%mm1, %%mm0 \n\t" | |
2072 "paddw %%mm3, %%mm2 \n\t" | |
2073 "paddw %%mm2, %%mm0 \n\t" | |
2074 "movd 6(%0, %%ebx), %%mm4 \n\t" | |
2075 "movd 6(%1, %%ebx), %%mm1 \n\t" | |
2076 "movd 9(%0, %%ebx), %%mm2 \n\t" | |
2077 "movd 9(%1, %%ebx), %%mm3 \n\t" | |
2078 "punpcklbw %%mm7, %%mm4 \n\t" | |
2079 "punpcklbw %%mm7, %%mm1 \n\t" | |
2080 "punpcklbw %%mm7, %%mm2 \n\t" | |
2081 "punpcklbw %%mm7, %%mm3 \n\t" | |
2082 "paddw %%mm1, %%mm4 \n\t" | |
2083 "paddw %%mm3, %%mm2 \n\t" | |
2084 "paddw %%mm4, %%mm2 \n\t" | |
2085 "psrlw $2, %%mm0 \n\t" | |
2086 "psrlw $2, %%mm2 \n\t" | |
2087 #endif | |
4923 | 2088 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
2089 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4622 | 2090 |
2091 "pmaddwd %%mm0, %%mm1 \n\t" | |
2092 "pmaddwd %%mm2, %%mm3 \n\t" | |
2093 "pmaddwd %%mm6, %%mm0 \n\t" | |
2094 "pmaddwd %%mm6, %%mm2 \n\t" | |
2095 #ifndef FAST_BGR2YV12 | |
2096 "psrad $8, %%mm0 \n\t" | |
2097 "psrad $8, %%mm1 \n\t" | |
2098 "psrad $8, %%mm2 \n\t" | |
2099 "psrad $8, %%mm3 \n\t" | |
2100 #endif | |
2101 "packssdw %%mm2, %%mm0 \n\t" | |
2102 "packssdw %%mm3, %%mm1 \n\t" | |
2103 "pmaddwd %%mm5, %%mm0 \n\t" | |
2104 "pmaddwd %%mm5, %%mm1 \n\t" | |
2105 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | |
2106 "psraw $7, %%mm0 \n\t" | |
2107 | |
2108 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
2109 "movq 12(%0, %%ebx), %%mm4 \n\t" | |
2110 "movq 12(%1, %%ebx), %%mm1 \n\t" | |
2111 "movq 18(%0, %%ebx), %%mm2 \n\t" | |
2112 "movq 18(%1, %%ebx), %%mm3 \n\t" | |
2113 PAVGB" %%mm1, %%mm4 \n\t" | |
2114 PAVGB" %%mm3, %%mm2 \n\t" | |
2115 "movq %%mm4, %%mm1 \n\t" | |
2116 "movq %%mm2, %%mm3 \n\t" | |
2117 "psrlq $24, %%mm4 \n\t" | |
2118 "psrlq $24, %%mm2 \n\t" | |
2119 PAVGB" %%mm1, %%mm4 \n\t" | |
2120 PAVGB" %%mm3, %%mm2 \n\t" | |
2121 "punpcklbw %%mm7, %%mm4 \n\t" | |
2122 "punpcklbw %%mm7, %%mm2 \n\t" | |
2123 #else | |
2124 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
2125 "movd 12(%1, %%ebx), %%mm1 \n\t" | |
2126 "movd 15(%0, %%ebx), %%mm2 \n\t" | |
2127 "movd 15(%1, %%ebx), %%mm3 \n\t" | |
2128 "punpcklbw %%mm7, %%mm4 \n\t" | |
2129 "punpcklbw %%mm7, %%mm1 \n\t" | |
2130 "punpcklbw %%mm7, %%mm2 \n\t" | |
2131 "punpcklbw %%mm7, %%mm3 \n\t" | |
2132 "paddw %%mm1, %%mm4 \n\t" | |
2133 "paddw %%mm3, %%mm2 \n\t" | |
2134 "paddw %%mm2, %%mm4 \n\t" | |
2135 "movd 18(%0, %%ebx), %%mm5 \n\t" | |
2136 "movd 18(%1, %%ebx), %%mm1 \n\t" | |
2137 "movd 21(%0, %%ebx), %%mm2 \n\t" | |
2138 "movd 21(%1, %%ebx), %%mm3 \n\t" | |
2139 "punpcklbw %%mm7, %%mm5 \n\t" | |
2140 "punpcklbw %%mm7, %%mm1 \n\t" | |
2141 "punpcklbw %%mm7, %%mm2 \n\t" | |
2142 "punpcklbw %%mm7, %%mm3 \n\t" | |
2143 "paddw %%mm1, %%mm5 \n\t" | |
2144 "paddw %%mm3, %%mm2 \n\t" | |
2145 "paddw %%mm5, %%mm2 \n\t" | |
4923 | 2146 "movq "MANGLE(w1111)", %%mm5 \n\t" |
4622 | 2147 "psrlw $2, %%mm4 \n\t" |
2148 "psrlw $2, %%mm2 \n\t" | |
2149 #endif | |
4923 | 2150 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
2151 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4622 | 2152 |
2153 "pmaddwd %%mm4, %%mm1 \n\t" | |
2154 "pmaddwd %%mm2, %%mm3 \n\t" | |
2155 "pmaddwd %%mm6, %%mm4 \n\t" | |
2156 "pmaddwd %%mm6, %%mm2 \n\t" | |
2157 #ifndef FAST_BGR2YV12 | |
2158 "psrad $8, %%mm4 \n\t" | |
2159 "psrad $8, %%mm1 \n\t" | |
2160 "psrad $8, %%mm2 \n\t" | |
2161 "psrad $8, %%mm3 \n\t" | |
2162 #endif | |
2163 "packssdw %%mm2, %%mm4 \n\t" | |
2164 "packssdw %%mm3, %%mm1 \n\t" | |
2165 "pmaddwd %%mm5, %%mm4 \n\t" | |
2166 "pmaddwd %%mm5, %%mm1 \n\t" | |
2167 "addl $24, %%ebx \n\t" | |
2168 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | |
2169 "psraw $7, %%mm4 \n\t" | |
2170 | |
2171 "movq %%mm0, %%mm1 \n\t" | |
2172 "punpckldq %%mm4, %%mm0 \n\t" | |
2173 "punpckhdq %%mm4, %%mm1 \n\t" | |
2174 "packsswb %%mm1, %%mm0 \n\t" | |
4923 | 2175 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" |
4622 | 2176 |
2177 "movd %%mm0, (%2, %%eax) \n\t" | |
2178 "punpckhdq %%mm0, %%mm0 \n\t" | |
2179 "movd %%mm0, (%3, %%eax) \n\t" | |
2180 "addl $4, %%eax \n\t" | |
2181 " js 1b \n\t" | |
2182 : : "r" (src+width*6), "r" (src+srcStride+width*6), "r" (udst+width), "r" (vdst+width), "g" (-width) | |
2183 : "%eax", "%ebx" | |
2184 ); | |
2185 | |
2186 udst += chromStride; | |
2187 vdst += chromStride; | |
2188 src += srcStride*2; | |
2189 } | |
2190 | |
2191 asm volatile( EMMS" \n\t" | |
2192 SFENCE" \n\t" | |
2193 :::"memory"); | |
2194 #else | |
2195 y=0; | |
2196 #endif | |
2197 for(; y<height; y+=2) | |
3132 | 2198 { |
6492 | 2199 unsigned i; |
3132 | 2200 for(i=0; i<chromWidth; i++) |
2201 { | |
2202 unsigned int b= src[6*i+0]; | |
2203 unsigned int g= src[6*i+1]; | |
2204 unsigned int r= src[6*i+2]; | |
2801 | 2205 |
3633 | 2206 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
2207 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128; | |
2208 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128; | |
3132 | 2209 |
2210 udst[i] = U; | |
2211 vdst[i] = V; | |
2212 ydst[2*i] = Y; | |
2213 | |
2214 b= src[6*i+3]; | |
2215 g= src[6*i+4]; | |
2216 r= src[6*i+5]; | |
2217 | |
3633 | 2218 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
3132 | 2219 ydst[2*i+1] = Y; |
2220 } | |
2221 ydst += lumStride; | |
2222 src += srcStride; | |
2223 | |
2224 for(i=0; i<chromWidth; i++) | |
2225 { | |
2226 unsigned int b= src[6*i+0]; | |
2227 unsigned int g= src[6*i+1]; | |
2228 unsigned int r= src[6*i+2]; | |
2229 | |
3633 | 2230 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
3132 | 2231 |
2232 ydst[2*i] = Y; | |
2233 | |
2234 b= src[6*i+3]; | |
2235 g= src[6*i+4]; | |
2236 r= src[6*i+5]; | |
2237 | |
3633 | 2238 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
3132 | 2239 ydst[2*i+1] = Y; |
2240 } | |
2241 udst += chromStride; | |
2242 vdst += chromStride; | |
2243 ydst += lumStride; | |
2244 src += srcStride; | |
2245 } | |
2246 } | |
5337 | 2247 |
2248 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, | |
9392 | 2249 unsigned width, unsigned height, int src1Stride, |
2250 int src2Stride, int dstStride){ | |
6492 | 2251 unsigned h; |
5337 | 2252 |
2253 for(h=0; h < height; h++) | |
2254 { | |
6492 | 2255 unsigned w; |
5337 | 2256 |
2257 #ifdef HAVE_MMX | |
2258 #ifdef HAVE_SSE2 | |
2259 asm( | |
2260 "xorl %%eax, %%eax \n\t" | |
2261 "1: \n\t" | |
2262 PREFETCH" 64(%1, %%eax) \n\t" | |
2263 PREFETCH" 64(%2, %%eax) \n\t" | |
2264 "movdqa (%1, %%eax), %%xmm0 \n\t" | |
2265 "movdqa (%1, %%eax), %%xmm1 \n\t" | |
2266 "movdqa (%2, %%eax), %%xmm2 \n\t" | |
2267 "punpcklbw %%xmm2, %%xmm0 \n\t" | |
2268 "punpckhbw %%xmm2, %%xmm1 \n\t" | |
2269 "movntdq %%xmm0, (%0, %%eax, 2) \n\t" | |
2270 "movntdq %%xmm1, 16(%0, %%eax, 2)\n\t" | |
2271 "addl $16, %%eax \n\t" | |
2272 "cmpl %3, %%eax \n\t" | |
2273 " jb 1b \n\t" | |
2274 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | |
2275 : "memory", "%eax" | |
2276 ); | |
2277 #else | |
2278 asm( | |
2279 "xorl %%eax, %%eax \n\t" | |
2280 "1: \n\t" | |
2281 PREFETCH" 64(%1, %%eax) \n\t" | |
2282 PREFETCH" 64(%2, %%eax) \n\t" | |
2283 "movq (%1, %%eax), %%mm0 \n\t" | |
2284 "movq 8(%1, %%eax), %%mm2 \n\t" | |
2285 "movq %%mm0, %%mm1 \n\t" | |
2286 "movq %%mm2, %%mm3 \n\t" | |
2287 "movq (%2, %%eax), %%mm4 \n\t" | |
2288 "movq 8(%2, %%eax), %%mm5 \n\t" | |
2289 "punpcklbw %%mm4, %%mm0 \n\t" | |
2290 "punpckhbw %%mm4, %%mm1 \n\t" | |
2291 "punpcklbw %%mm5, %%mm2 \n\t" | |
2292 "punpckhbw %%mm5, %%mm3 \n\t" | |
2293 MOVNTQ" %%mm0, (%0, %%eax, 2) \n\t" | |
2294 MOVNTQ" %%mm1, 8(%0, %%eax, 2) \n\t" | |
2295 MOVNTQ" %%mm2, 16(%0, %%eax, 2) \n\t" | |
2296 MOVNTQ" %%mm3, 24(%0, %%eax, 2) \n\t" | |
2297 "addl $16, %%eax \n\t" | |
2298 "cmpl %3, %%eax \n\t" | |
2299 " jb 1b \n\t" | |
2300 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | |
2301 : "memory", "%eax" | |
2302 ); | |
2303 #endif | |
2304 for(w= (width&(~15)); w < width; w++) | |
2305 { | |
2306 dest[2*w+0] = src1[w]; | |
2307 dest[2*w+1] = src2[w]; | |
2308 } | |
2309 #else | |
2310 for(w=0; w < width; w++) | |
2311 { | |
2312 dest[2*w+0] = src1[w]; | |
2313 dest[2*w+1] = src2[w]; | |
2314 } | |
2315 #endif | |
2316 dest += dstStride; | |
2317 src1 += src1Stride; | |
2318 src2 += src2Stride; | |
2319 } | |
2320 #ifdef HAVE_MMX | |
2321 asm( | |
2322 EMMS" \n\t" | |
2323 SFENCE" \n\t" | |
2324 ::: "memory" | |
2325 ); | |
2326 #endif | |
2327 } | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2328 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2329 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2330 uint8_t *dst1, uint8_t *dst2, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2331 unsigned width, unsigned height, |
9392 | 2332 int srcStride1, int srcStride2, |
2333 int dstStride1, int dstStride2) | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2334 { |
9392 | 2335 unsigned int y,x,h; |
2336 int w; | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2337 w=width/2; h=height/2; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2338 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2339 asm volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2340 PREFETCH" %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2341 PREFETCH" %1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2342 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2343 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2344 for(y=0;y<h;y++){ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2345 const uint8_t* s1=src1+srcStride1*(y>>1); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2346 uint8_t* d=dst1+dstStride1*y; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2347 x=0; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2348 #ifdef HAVE_MMX |
9392 | 2349 for(;x<w-31;x+=32) |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2350 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2351 asm volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2352 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2353 "movq %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2354 "movq 8%1, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2355 "movq 16%1, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2356 "movq 24%1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2357 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2358 "movq %%mm2, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2359 "movq %%mm4, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2360 "movq %%mm6, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2361 "punpcklbw %%mm0, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2362 "punpckhbw %%mm1, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2363 "punpcklbw %%mm2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2364 "punpckhbw %%mm3, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2365 "punpcklbw %%mm4, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2366 "punpckhbw %%mm5, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2367 "punpcklbw %%mm6, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2368 "punpckhbw %%mm7, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2369 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2370 MOVNTQ" %%mm1, 8%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2371 MOVNTQ" %%mm2, 16%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2372 MOVNTQ" %%mm3, 24%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2373 MOVNTQ" %%mm4, 32%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2374 MOVNTQ" %%mm5, 40%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2375 MOVNTQ" %%mm6, 48%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2376 MOVNTQ" %%mm7, 56%0" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2377 :"=m"(d[2*x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2378 :"m"(s1[x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2379 :"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2380 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2381 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2382 for(;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2383 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2384 for(y=0;y<h;y++){ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2385 const uint8_t* s2=src2+srcStride2*(y>>1); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2386 uint8_t* d=dst2+dstStride2*y; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2387 x=0; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2388 #ifdef HAVE_MMX |
9392 | 2389 for(;x<w-31;x+=32) |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2390 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2391 asm volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2392 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2393 "movq %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2394 "movq 8%1, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2395 "movq 16%1, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2396 "movq 24%1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2397 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2398 "movq %%mm2, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2399 "movq %%mm4, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2400 "movq %%mm6, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2401 "punpcklbw %%mm0, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2402 "punpckhbw %%mm1, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2403 "punpcklbw %%mm2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2404 "punpckhbw %%mm3, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2405 "punpcklbw %%mm4, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2406 "punpckhbw %%mm5, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2407 "punpcklbw %%mm6, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2408 "punpckhbw %%mm7, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2409 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2410 MOVNTQ" %%mm1, 8%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2411 MOVNTQ" %%mm2, 16%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2412 MOVNTQ" %%mm3, 24%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2413 MOVNTQ" %%mm4, 32%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2414 MOVNTQ" %%mm5, 40%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2415 MOVNTQ" %%mm6, 48%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2416 MOVNTQ" %%mm7, 56%0" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2417 :"=m"(d[2*x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2418 :"m"(s2[x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2419 :"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2420 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2421 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2422 for(;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2423 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2424 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2425 asm( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2426 EMMS" \n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2427 SFENCE" \n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2428 ::: "memory" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2429 ); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2430 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2431 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2432 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2433 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2434 uint8_t *dst, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2435 unsigned width, unsigned height, |
9392 | 2436 int srcStride1, int srcStride2, |
2437 int srcStride3, int dstStride) | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2438 { |
9394 | 2439 unsigned y,x,w,h; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2440 w=width/2; h=height; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2441 for(y=0;y<h;y++){ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2442 const uint8_t* yp=src1+srcStride1*y; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2443 const uint8_t* up=src2+srcStride2*(y>>2); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2444 const uint8_t* vp=src3+srcStride3*(y>>2); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2445 uint8_t* d=dst+dstStride*y; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2446 x=0; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2447 #ifdef HAVE_MMX |
9394 | 2448 for(;x<w-7;x+=8) |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2449 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2450 asm volatile( |
9394 | 2451 PREFETCH" 32(%1, %0)\n\t" |
2452 PREFETCH" 32(%2, %0)\n\t" | |
2453 PREFETCH" 32(%3, %0)\n\t" | |
2454 "movq (%1, %0, 4), %%mm0\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ | |
2455 "movq (%2, %0), %%mm1\n\t" /* U0U1U2U3U4U5U6U7 */ | |
2456 "movq (%3, %0), %%mm2\n\t" /* V0V1V2V3V4V5V6V7 */ | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2457 "movq %%mm0, %%mm3\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2458 "movq %%mm1, %%mm4\n\t" /* U0U1U2U3U4U5U6U7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2459 "movq %%mm2, %%mm5\n\t" /* V0V1V2V3V4V5V6V7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2460 "punpcklbw %%mm1, %%mm1\n\t" /* U0U0 U1U1 U2U2 U3U3 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2461 "punpcklbw %%mm2, %%mm2\n\t" /* V0V0 V1V1 V2V2 V3V3 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2462 "punpckhbw %%mm4, %%mm4\n\t" /* U4U4 U5U5 U6U6 U7U7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2463 "punpckhbw %%mm5, %%mm5\n\t" /* V4V4 V5V5 V6V6 V7V7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2464 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2465 "movq %%mm1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2466 "punpcklbw %%mm2, %%mm1\n\t" /* U0V0 U0V0 U1V1 U1V1*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2467 "punpcklbw %%mm1, %%mm0\n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2468 "punpckhbw %%mm1, %%mm3\n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ |
9394 | 2469 MOVNTQ" %%mm0, (%4, %0, 8)\n\t" |
2470 MOVNTQ" %%mm3, 8(%4, %0, 8)\n\t" | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2471 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2472 "punpckhbw %%mm2, %%mm6\n\t" /* U2V2 U2V2 U3V3 U3V3*/ |
9394 | 2473 "movq 8(%1, %0, 4), %%mm0\n\t" |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2474 "movq %%mm0, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2475 "punpcklbw %%mm6, %%mm0\n\t" /* Y U2 Y V2 Y U2 Y V2*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2476 "punpckhbw %%mm6, %%mm3\n\t" /* Y U3 Y V3 Y U3 Y V3*/ |
9394 | 2477 MOVNTQ" %%mm0, 16(%4, %0, 8)\n\t" |
2478 MOVNTQ" %%mm3, 24(%4, %0, 8)\n\t" | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2479 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2480 "movq %%mm4, %%mm6\n\t" |
9394 | 2481 "movq 16(%1, %0, 4), %%mm0\n\t" |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2482 "movq %%mm0, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2483 "punpcklbw %%mm5, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2484 "punpcklbw %%mm4, %%mm0\n\t" /* Y U4 Y V4 Y U4 Y V4*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2485 "punpckhbw %%mm4, %%mm3\n\t" /* Y U5 Y V5 Y U5 Y V5*/ |
9394 | 2486 MOVNTQ" %%mm0, 32(%4, %0, 8)\n\t" |
2487 MOVNTQ" %%mm3, 40(%4, %0, 8)\n\t" | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2488 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2489 "punpckhbw %%mm5, %%mm6\n\t" |
9394 | 2490 "movq 24(%1, %0, 4), %%mm0\n\t" |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2491 "movq %%mm0, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2492 "punpcklbw %%mm6, %%mm0\n\t" /* Y U6 Y V6 Y U6 Y V6*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2493 "punpckhbw %%mm6, %%mm3\n\t" /* Y U7 Y V7 Y U7 Y V7*/ |
9394 | 2494 MOVNTQ" %%mm0, 48(%4, %0, 8)\n\t" |
2495 MOVNTQ" %%mm3, 56(%4, %0, 8)\n\t" | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2496 |
9394 | 2497 : "+r" (x) |
2498 : "r"(yp), "r" (up), "r"(vp), "r"(d) | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2499 :"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2500 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2501 #endif |
9394 | 2502 for(; x<w; x++) |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2503 { |
9394 | 2504 const int x2= x<<2; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2505 d[8*x+0]=yp[x2]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2506 d[8*x+1]=up[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2507 d[8*x+2]=yp[x2+1]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2508 d[8*x+3]=vp[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2509 d[8*x+4]=yp[x2+2]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2510 d[8*x+5]=up[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2511 d[8*x+6]=yp[x2+3]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2512 d[8*x+7]=vp[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2513 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2514 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2515 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2516 asm( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2517 EMMS" \n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2518 SFENCE" \n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2519 ::: "memory" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2520 ); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2521 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2522 } |