Mercurial > mplayer.hg
annotate postproc/rgb2rgb_template.c @ 9046:13b7ad16f278
This patch should fix the display problem with 4bpp and 8bpp modes.
The problem was that the new drawing method assumes a linear
framebuffer, which is not available in those modes. This can be worked
around by using the old drawing method, which is what this patch does.
The old method can be forced, by using the "old" driver option.
This patch also enables linear addressing, since it improves write speed
to video memory considerably. The mentioned problem:
"it is not compatable with vga_draw* for some cards"
Is a bug in svgalib, which I think should be fixed in recent svgalib
versions. If someone sees this problem, please report to svgalib
maintainer (that's me).
patch by Matan Ziv-Av. matan@svgalib.org
author | arpi |
---|---|
date | Mon, 20 Jan 2003 21:33:11 +0000 |
parents | 4d6369c408b5 |
children | 93e8d1655797 |
rev | line source |
---|---|
2694 | 1 /* |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
2 * |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
3 * rgb2rgb.c, Software RGB to RGB convertor |
2732 | 4 * pluralize by Software PAL8 to RGB convertor |
5 * Software YUV to YUV convertor | |
6 * Software YUV to RGB convertor | |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
7 * Written by Nick Kurshev. |
3132 | 8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
9 */ |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
10 |
6492 | 11 #include <stddef.h> |
12 #include <inttypes.h> /* for __WORDSIZE */ | |
13 | |
14 #ifndef __WORDSIZE | |
7421
0684cad9b204
use detected WORDSIZE instead of warning, when inttypes.h doesn't define __WORDSIZE
arpi
parents:
6608
diff
changeset
|
15 // #warning You have misconfigured system and probably will lose performance! |
0684cad9b204
use detected WORDSIZE instead of warning, when inttypes.h doesn't define __WORDSIZE
arpi
parents:
6608
diff
changeset
|
16 #define __WORDSIZE MP_WORDSIZE |
6492 | 17 #endif |
18 | |
3132 | 19 #undef PREFETCH |
20 #undef MOVNTQ | |
21 #undef EMMS | |
22 #undef SFENCE | |
23 #undef MMREG_SIZE | |
24 #undef PREFETCHW | |
25 #undef PAVGB | |
2755 | 26 |
3132 | 27 #ifdef HAVE_SSE2 |
28 #define MMREG_SIZE 16 | |
29 #else | |
30 #define MMREG_SIZE 8 | |
2535 | 31 #endif |
2513 | 32 |
3132 | 33 #ifdef HAVE_3DNOW |
34 #define PREFETCH "prefetch" | |
35 #define PREFETCHW "prefetchw" | |
36 #define PAVGB "pavgusb" | |
37 #elif defined ( HAVE_MMX2 ) | |
38 #define PREFETCH "prefetchnta" | |
39 #define PREFETCHW "prefetcht0" | |
40 #define PAVGB "pavgb" | |
41 #else | |
42 #define PREFETCH "/nop" | |
43 #define PREFETCHW "/nop" | |
44 #endif | |
45 | |
46 #ifdef HAVE_3DNOW | |
47 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
48 #define EMMS "femms" | |
49 #else | |
50 #define EMMS "emms" | |
51 #endif | |
52 | |
53 #ifdef HAVE_MMX2 | |
54 #define MOVNTQ "movntq" | |
55 #define SFENCE "sfence" | |
56 #else | |
57 #define MOVNTQ "movq" | |
58 #define SFENCE "/nop" | |
59 #endif | |
60 | |
61 static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,unsigned src_size) | |
2504 | 62 { |
2508 | 63 uint8_t *dest = dst; |
2677 | 64 const uint8_t *s = src; |
65 const uint8_t *end; | |
2510 | 66 #ifdef HAVE_MMX |
6605 | 67 const uint8_t *mm_end; |
2510 | 68 #endif |
2504 | 69 end = s + src_size; |
2510 | 70 #ifdef HAVE_MMX |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
71 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
6605 | 72 mm_end = end - 23; |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
73 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); |
2510 | 74 while(s < mm_end) |
75 { | |
2511 | 76 __asm __volatile( |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
77 PREFETCH" 32%1\n\t" |
2510 | 78 "movd %1, %%mm0\n\t" |
2738 | 79 "punpckldq 3%1, %%mm0\n\t" |
80 "movd 6%1, %%mm1\n\t" | |
81 "punpckldq 9%1, %%mm1\n\t" | |
82 "movd 12%1, %%mm2\n\t" | |
83 "punpckldq 15%1, %%mm2\n\t" | |
84 "movd 18%1, %%mm3\n\t" | |
85 "punpckldq 21%1, %%mm3\n\t" | |
2510 | 86 "pand %%mm7, %%mm0\n\t" |
2738 | 87 "pand %%mm7, %%mm1\n\t" |
2510 | 88 "pand %%mm7, %%mm2\n\t" |
2738 | 89 "pand %%mm7, %%mm3\n\t" |
2511 | 90 MOVNTQ" %%mm0, %0\n\t" |
2738 | 91 MOVNTQ" %%mm1, 8%0\n\t" |
92 MOVNTQ" %%mm2, 16%0\n\t" | |
93 MOVNTQ" %%mm3, 24%0" | |
2510 | 94 :"=m"(*dest) |
95 :"m"(*s) | |
96 :"memory"); | |
2738 | 97 dest += 32; |
98 s += 24; | |
2510 | 99 } |
2513 | 100 __asm __volatile(SFENCE:::"memory"); |
2511 | 101 __asm __volatile(EMMS:::"memory"); |
2510 | 102 #endif |
2504 | 103 while(s < end) |
104 { | |
2508 | 105 *dest++ = *s++; |
106 *dest++ = *s++; | |
107 *dest++ = *s++; | |
108 *dest++ = 0; | |
2504 | 109 } |
110 } | |
2505 | 111 |
3132 | 112 static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned src_size) |
2505 | 113 { |
114 uint8_t *dest = dst; | |
2677 | 115 const uint8_t *s = src; |
116 const uint8_t *end; | |
2517 | 117 #ifdef HAVE_MMX |
6605 | 118 const uint8_t *mm_end; |
2517 | 119 #endif |
2505 | 120 end = s + src_size; |
2517 | 121 #ifdef HAVE_MMX |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
122 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
6605 | 123 mm_end = end - 31; |
2517 | 124 while(s < mm_end) |
125 { | |
126 __asm __volatile( | |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
127 PREFETCH" 32%1\n\t" |
2517 | 128 "movq %1, %%mm0\n\t" |
129 "movq 8%1, %%mm1\n\t" | |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
130 "movq 16%1, %%mm4\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
131 "movq 24%1, %%mm5\n\t" |
2517 | 132 "movq %%mm0, %%mm2\n\t" |
133 "movq %%mm1, %%mm3\n\t" | |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
134 "movq %%mm4, %%mm6\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
135 "movq %%mm5, %%mm7\n\t" |
2517 | 136 "psrlq $8, %%mm2\n\t" |
137 "psrlq $8, %%mm3\n\t" | |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
138 "psrlq $8, %%mm6\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
139 "psrlq $8, %%mm7\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
140 "pand %2, %%mm0\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
141 "pand %2, %%mm1\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
142 "pand %2, %%mm4\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
143 "pand %2, %%mm5\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
144 "pand %3, %%mm2\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
145 "pand %3, %%mm3\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
146 "pand %3, %%mm6\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
147 "pand %3, %%mm7\n\t" |
2517 | 148 "por %%mm2, %%mm0\n\t" |
149 "por %%mm3, %%mm1\n\t" | |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
150 "por %%mm6, %%mm4\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
151 "por %%mm7, %%mm5\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
152 |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
153 "movq %%mm1, %%mm2\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
154 "movq %%mm4, %%mm3\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
155 "psllq $48, %%mm2\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
156 "psllq $32, %%mm3\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
157 "pand %4, %%mm2\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
158 "pand %5, %%mm3\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
159 "por %%mm2, %%mm0\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
160 "psrlq $16, %%mm1\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
161 "psrlq $32, %%mm4\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
162 "psllq $16, %%mm5\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
163 "por %%mm3, %%mm1\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
164 "pand %6, %%mm5\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
165 "por %%mm5, %%mm4\n\t" |
3132 | 166 |
2517 | 167 MOVNTQ" %%mm0, %0\n\t" |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
168 MOVNTQ" %%mm1, 8%0\n\t" |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
169 MOVNTQ" %%mm4, 16%0" |
2517 | 170 :"=m"(*dest) |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
171 :"m"(*s),"m"(mask24l), |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
172 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) |
2517 | 173 :"memory"); |
2746
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
174 dest += 24; |
dece635a28e3
Minor speedup of rgb32to24. (performance is not successful)
nick
parents:
2741
diff
changeset
|
175 s += 32; |
2517 | 176 } |
177 __asm __volatile(SFENCE:::"memory"); | |
178 __asm __volatile(EMMS:::"memory"); | |
179 #endif | |
2505 | 180 while(s < end) |
181 { | |
182 *dest++ = *s++; | |
183 *dest++ = *s++; | |
184 *dest++ = *s++; | |
185 s++; | |
186 } | |
187 } | |
2506 | 188 |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
189 /* |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
190 Original by Strepto/Astral |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
191 ported to gcc & bugfixed : A'rpi |
2564 | 192 MMX2, 3DNOW optimization by Nick Kurshev |
2698
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
193 32bit c version, and and&add trick by Michael Niedermayer |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
194 */ |
3132 | 195 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) |
2506 | 196 { |
6492 | 197 register const uint8_t* s=src; |
198 register uint8_t* d=dst; | |
199 register const uint8_t *end; | |
6605 | 200 const uint8_t *mm_end; |
6492 | 201 end = s + src_size; |
2506 | 202 #ifdef HAVE_MMX |
6492 | 203 __asm __volatile(PREFETCH" %0"::"m"(*s)); |
204 __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); | |
6605 | 205 mm_end = end - 15; |
6492 | 206 while(s<mm_end) |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
207 { |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
208 __asm __volatile( |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
209 PREFETCH" 32%1\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
210 "movq %1, %%mm0\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
211 "movq 8%1, %%mm2\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
212 "movq %%mm0, %%mm1\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
213 "movq %%mm2, %%mm3\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
214 "pand %%mm4, %%mm0\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
215 "pand %%mm4, %%mm2\n\t" |
2698
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
216 "paddw %%mm1, %%mm0\n\t" |
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
217 "paddw %%mm3, %%mm2\n\t" |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
218 MOVNTQ" %%mm0, %0\n\t" |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
219 MOVNTQ" %%mm2, 8%0" |
6492 | 220 :"=m"(*d) |
221 :"m"(*s) | |
2698
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
222 ); |
6492 | 223 d+=16; |
224 s+=16; | |
2506 | 225 } |
2538
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
226 __asm __volatile(SFENCE:::"memory"); |
71320898b333
Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents:
2535
diff
changeset
|
227 __asm __volatile(EMMS:::"memory"); |
2698
22652c028692
faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents:
2697
diff
changeset
|
228 #endif |
6605 | 229 mm_end = end - 3; |
6492 | 230 while(s < mm_end) |
231 { | |
232 register unsigned x= *((uint32_t *)s); | |
233 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); | |
234 d+=4; | |
235 s+=4; | |
236 } | |
237 if(s < end) | |
238 { | |
239 register unsigned short x= *((uint16_t *)s); | |
240 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); | |
241 } | |
2506 | 242 } |
2694 | 243 |
6484
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
244 static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
245 { |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
246 unsigned j,i,num_pixels=src_size/3; |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
247 for(i=0,j=0; j<num_pixels; i+=3,j+=3) |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
248 { |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
249 dst[j+0] = src[i+2]; |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
250 dst[j+1] = src[i+1]; |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
251 dst[j+2] = src[i+0]; |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
252 } |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
253 } |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
254 |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
255 static inline void RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,unsigned src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
256 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
257 register const uint8_t* s=src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
258 register uint8_t* d=dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
259 register const uint8_t *end; |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
260 const uint8_t *mm_end; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
261 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
262 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
263 __asm __volatile(PREFETCH" %0"::"m"(*s)); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
264 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg)); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
265 __asm __volatile("movq %0, %%mm6"::"m"(mask15b)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
266 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
267 while(s<mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
268 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
269 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
270 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
271 "movq %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
272 "movq 8%1, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
273 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
274 "movq %%mm2, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
275 "psrlq $1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
276 "psrlq $1, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
277 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
278 "pand %%mm7, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
279 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
280 "pand %%mm6, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
281 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
282 "por %%mm3, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
283 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
284 MOVNTQ" %%mm2, 8%0" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
285 :"=m"(*d) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
286 :"m"(*s) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
287 ); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
288 d+=16; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
289 s+=16; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
290 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
291 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
292 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
293 #endif |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
294 mm_end = end - 3; |
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
295 while(s < mm_end) |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
296 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
297 register uint32_t x= *((uint32_t *)s); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
298 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
299 s+=4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
300 d+=4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
301 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
302 if(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
303 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
304 register uint16_t x= *((uint16_t *)s); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
305 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
306 s+=2; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
307 d+=2; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
308 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
309 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
310 |
3132 | 311 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
2694 | 312 { |
6492 | 313 const uint8_t *s = src; |
314 const uint8_t *end; | |
2741 | 315 #ifdef HAVE_MMX |
6492 | 316 const uint8_t *mm_end; |
317 #endif | |
2741 | 318 uint16_t *d = (uint16_t *)dst; |
319 end = s + src_size; | |
6492 | 320 #ifdef HAVE_MMX |
2741 | 321 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
322 __asm __volatile( | |
323 "movq %0, %%mm7\n\t" | |
324 "movq %1, %%mm6\n\t" | |
325 ::"m"(red_16mask),"m"(green_16mask)); | |
6605 | 326 mm_end = end - 15; |
2741 | 327 while(s < mm_end) |
328 { | |
329 __asm __volatile( | |
330 PREFETCH" 32%1\n\t" | |
331 "movd %1, %%mm0\n\t" | |
332 "movd 4%1, %%mm3\n\t" | |
333 "punpckldq 8%1, %%mm0\n\t" | |
334 "punpckldq 12%1, %%mm3\n\t" | |
335 "movq %%mm0, %%mm1\n\t" | |
336 "movq %%mm0, %%mm2\n\t" | |
337 "movq %%mm3, %%mm4\n\t" | |
338 "movq %%mm3, %%mm5\n\t" | |
339 "psrlq $3, %%mm0\n\t" | |
340 "psrlq $3, %%mm3\n\t" | |
341 "pand %2, %%mm0\n\t" | |
342 "pand %2, %%mm3\n\t" | |
343 "psrlq $5, %%mm1\n\t" | |
344 "psrlq $5, %%mm4\n\t" | |
345 "pand %%mm6, %%mm1\n\t" | |
346 "pand %%mm6, %%mm4\n\t" | |
347 "psrlq $8, %%mm2\n\t" | |
348 "psrlq $8, %%mm5\n\t" | |
349 "pand %%mm7, %%mm2\n\t" | |
350 "pand %%mm7, %%mm5\n\t" | |
351 "por %%mm1, %%mm0\n\t" | |
352 "por %%mm4, %%mm3\n\t" | |
353 "por %%mm2, %%mm0\n\t" | |
354 "por %%mm5, %%mm3\n\t" | |
355 "psllq $16, %%mm3\n\t" | |
356 "por %%mm3, %%mm0\n\t" | |
357 MOVNTQ" %%mm0, %0\n\t" | |
358 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
359 d += 4; | |
360 s += 16; | |
361 } | |
6492 | 362 __asm __volatile(SFENCE:::"memory"); |
363 __asm __volatile(EMMS:::"memory"); | |
364 #endif | |
2741 | 365 while(s < end) |
366 { | |
7891
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
367 #ifndef WORDS_BIGENDIAN |
2741 | 368 const int b= *s++; |
369 const int g= *s++; | |
370 const int r= *s++; | |
7891
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
371 #else |
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
372 const int a= *s++; /*skip*/ |
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
373 const int r= *s++; |
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
374 const int g= *s++; |
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
375 const int b= *s++; |
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
376 #endif |
2741 | 377 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
7891
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
378 #ifndef WORDS_BIGENDIAN |
6492 | 379 s++; |
7891
4d6369c408b5
fixing RGB32->RGB16 on big endian patch by (Colin Leroy <colin at colino dot net>)
michael
parents:
7421
diff
changeset
|
380 #endif |
2741 | 381 } |
2694 | 382 } |
383 | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
384 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
385 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
386 const uint8_t *s = src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
387 const uint8_t *end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
388 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
389 const uint8_t *mm_end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
390 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
391 uint16_t *d = (uint16_t *)dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
392 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
393 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
394 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
395 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
396 "movq %0, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
397 "movq %1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
398 ::"m"(red_16mask),"m"(green_16mask)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
399 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
400 while(s < mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
401 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
402 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
403 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
404 "movd %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
405 "movd 4%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
406 "punpckldq 8%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
407 "punpckldq 12%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
408 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
409 "movq %%mm0, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
410 "movq %%mm3, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
411 "movq %%mm3, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
412 "psllq $8, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
413 "psllq $8, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
414 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
415 "pand %%mm7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
416 "psrlq $5, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
417 "psrlq $5, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
418 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
419 "pand %%mm6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
420 "psrlq $19, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
421 "psrlq $19, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
422 "pand %2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
423 "pand %2, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
424 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
425 "por %%mm4, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
426 "por %%mm2, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
427 "por %%mm5, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
428 "psllq $16, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
429 "por %%mm3, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
430 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
431 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
432 d += 4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
433 s += 16; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
434 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
435 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
436 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
437 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
438 while(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
439 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
440 const int r= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
441 const int g= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
442 const int b= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
443 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
444 s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
445 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
446 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
447 |
3132 | 448 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
2694 | 449 { |
6492 | 450 const uint8_t *s = src; |
451 const uint8_t *end; | |
2741 | 452 #ifdef HAVE_MMX |
6492 | 453 const uint8_t *mm_end; |
454 #endif | |
2741 | 455 uint16_t *d = (uint16_t *)dst; |
456 end = s + src_size; | |
6492 | 457 #ifdef HAVE_MMX |
2741 | 458 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
459 __asm __volatile( | |
460 "movq %0, %%mm7\n\t" | |
461 "movq %1, %%mm6\n\t" | |
462 ::"m"(red_15mask),"m"(green_15mask)); | |
6605 | 463 mm_end = end - 15; |
2741 | 464 while(s < mm_end) |
465 { | |
466 __asm __volatile( | |
467 PREFETCH" 32%1\n\t" | |
468 "movd %1, %%mm0\n\t" | |
469 "movd 4%1, %%mm3\n\t" | |
470 "punpckldq 8%1, %%mm0\n\t" | |
471 "punpckldq 12%1, %%mm3\n\t" | |
472 "movq %%mm0, %%mm1\n\t" | |
473 "movq %%mm0, %%mm2\n\t" | |
474 "movq %%mm3, %%mm4\n\t" | |
475 "movq %%mm3, %%mm5\n\t" | |
476 "psrlq $3, %%mm0\n\t" | |
477 "psrlq $3, %%mm3\n\t" | |
478 "pand %2, %%mm0\n\t" | |
479 "pand %2, %%mm3\n\t" | |
480 "psrlq $6, %%mm1\n\t" | |
481 "psrlq $6, %%mm4\n\t" | |
482 "pand %%mm6, %%mm1\n\t" | |
483 "pand %%mm6, %%mm4\n\t" | |
484 "psrlq $9, %%mm2\n\t" | |
485 "psrlq $9, %%mm5\n\t" | |
486 "pand %%mm7, %%mm2\n\t" | |
487 "pand %%mm7, %%mm5\n\t" | |
488 "por %%mm1, %%mm0\n\t" | |
489 "por %%mm4, %%mm3\n\t" | |
490 "por %%mm2, %%mm0\n\t" | |
491 "por %%mm5, %%mm3\n\t" | |
492 "psllq $16, %%mm3\n\t" | |
493 "por %%mm3, %%mm0\n\t" | |
494 MOVNTQ" %%mm0, %0\n\t" | |
495 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
496 d += 4; | |
497 s += 16; | |
498 } | |
6492 | 499 __asm __volatile(SFENCE:::"memory"); |
500 __asm __volatile(EMMS:::"memory"); | |
501 #endif | |
2741 | 502 while(s < end) |
503 { | |
504 const int b= *s++; | |
505 const int g= *s++; | |
506 const int r= *s++; | |
6492 | 507 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
6096 | 508 s++; |
2741 | 509 } |
2694 | 510 } |
511 | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
512 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
513 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
514 const uint8_t *s = src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
515 const uint8_t *end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
516 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
517 const uint8_t *mm_end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
518 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
519 uint16_t *d = (uint16_t *)dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
520 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
521 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
522 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
523 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
524 "movq %0, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
525 "movq %1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
526 ::"m"(red_15mask),"m"(green_15mask)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
527 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
528 while(s < mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
529 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
530 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
531 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
532 "movd %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
533 "movd 4%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
534 "punpckldq 8%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
535 "punpckldq 12%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
536 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
537 "movq %%mm0, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
538 "movq %%mm3, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
539 "movq %%mm3, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
540 "psllq $7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
541 "psllq $7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
542 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
543 "pand %%mm7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
544 "psrlq $6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
545 "psrlq $6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
546 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
547 "pand %%mm6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
548 "psrlq $19, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
549 "psrlq $19, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
550 "pand %2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
551 "pand %2, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
552 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
553 "por %%mm4, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
554 "por %%mm2, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
555 "por %%mm5, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
556 "psllq $16, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
557 "por %%mm3, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
558 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
559 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
560 d += 4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
561 s += 16; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
562 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
563 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
564 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
565 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
566 while(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
567 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
568 const int r= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
569 const int g= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
570 const int b= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
571 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
572 s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
573 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
574 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
575 |
3132 | 576 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
2718 | 577 { |
6492 | 578 const uint8_t *s = src; |
579 const uint8_t *end; | |
2740 | 580 #ifdef HAVE_MMX |
6492 | 581 const uint8_t *mm_end; |
582 #endif | |
2719
fafa73d6d80c
Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents:
2718
diff
changeset
|
583 uint16_t *d = (uint16_t *)dst; |
2740 | 584 end = s + src_size; |
6492 | 585 #ifdef HAVE_MMX |
2738 | 586 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
587 __asm __volatile( | |
588 "movq %0, %%mm7\n\t" | |
589 "movq %1, %%mm6\n\t" | |
2741 | 590 ::"m"(red_16mask),"m"(green_16mask)); |
6605 | 591 mm_end = end - 11; |
2740 | 592 while(s < mm_end) |
2738 | 593 { |
594 __asm __volatile( | |
595 PREFETCH" 32%1\n\t" | |
596 "movd %1, %%mm0\n\t" | |
2740 | 597 "movd 3%1, %%mm3\n\t" |
598 "punpckldq 6%1, %%mm0\n\t" | |
2738 | 599 "punpckldq 9%1, %%mm3\n\t" |
600 "movq %%mm0, %%mm1\n\t" | |
601 "movq %%mm0, %%mm2\n\t" | |
602 "movq %%mm3, %%mm4\n\t" | |
603 "movq %%mm3, %%mm5\n\t" | |
604 "psrlq $3, %%mm0\n\t" | |
605 "psrlq $3, %%mm3\n\t" | |
2740 | 606 "pand %2, %%mm0\n\t" |
607 "pand %2, %%mm3\n\t" | |
608 "psrlq $5, %%mm1\n\t" | |
609 "psrlq $5, %%mm4\n\t" | |
610 "pand %%mm6, %%mm1\n\t" | |
611 "pand %%mm6, %%mm4\n\t" | |
612 "psrlq $8, %%mm2\n\t" | |
613 "psrlq $8, %%mm5\n\t" | |
614 "pand %%mm7, %%mm2\n\t" | |
615 "pand %%mm7, %%mm5\n\t" | |
2738 | 616 "por %%mm1, %%mm0\n\t" |
2740 | 617 "por %%mm4, %%mm3\n\t" |
2738 | 618 "por %%mm2, %%mm0\n\t" |
619 "por %%mm5, %%mm3\n\t" | |
2740 | 620 "psllq $16, %%mm3\n\t" |
621 "por %%mm3, %%mm0\n\t" | |
2738 | 622 MOVNTQ" %%mm0, %0\n\t" |
2741 | 623 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
2740 | 624 d += 4; |
625 s += 12; | |
2738 | 626 } |
6492 | 627 __asm __volatile(SFENCE:::"memory"); |
628 __asm __volatile(EMMS:::"memory"); | |
629 #endif | |
2740 | 630 while(s < end) |
631 { | |
632 const int b= *s++; | |
633 const int g= *s++; | |
634 const int r= *s++; | |
635 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | |
636 } | |
2718 | 637 } |
638 | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
639 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
640 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
641 const uint8_t *s = src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
642 const uint8_t *end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
643 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
644 const uint8_t *mm_end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
645 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
646 uint16_t *d = (uint16_t *)dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
647 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
648 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
649 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
650 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
651 "movq %0, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
652 "movq %1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
653 ::"m"(red_16mask),"m"(green_16mask)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
654 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
655 while(s < mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
656 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
657 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
658 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
659 "movd %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
660 "movd 3%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
661 "punpckldq 6%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
662 "punpckldq 9%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
663 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
664 "movq %%mm0, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
665 "movq %%mm3, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
666 "movq %%mm3, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
667 "psllq $8, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
668 "psllq $8, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
669 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
670 "pand %%mm7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
671 "psrlq $5, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
672 "psrlq $5, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
673 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
674 "pand %%mm6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
675 "psrlq $19, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
676 "psrlq $19, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
677 "pand %2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
678 "pand %2, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
679 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
680 "por %%mm4, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
681 "por %%mm2, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
682 "por %%mm5, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
683 "psllq $16, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
684 "por %%mm3, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
685 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
686 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
687 d += 4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
688 s += 12; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
689 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
690 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
691 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
692 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
693 while(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
694 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
695 const int r= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
696 const int g= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
697 const int b= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
698 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
699 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
700 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
701 |
3132 | 702 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
2718 | 703 { |
6492 | 704 const uint8_t *s = src; |
705 const uint8_t *end; | |
2741 | 706 #ifdef HAVE_MMX |
6492 | 707 const uint8_t *mm_end; |
708 #endif | |
2741 | 709 uint16_t *d = (uint16_t *)dst; |
710 end = s + src_size; | |
6492 | 711 #ifdef HAVE_MMX |
2741 | 712 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
713 __asm __volatile( | |
714 "movq %0, %%mm7\n\t" | |
715 "movq %1, %%mm6\n\t" | |
716 ::"m"(red_15mask),"m"(green_15mask)); | |
6605 | 717 mm_end = end - 11; |
2741 | 718 while(s < mm_end) |
719 { | |
720 __asm __volatile( | |
721 PREFETCH" 32%1\n\t" | |
722 "movd %1, %%mm0\n\t" | |
723 "movd 3%1, %%mm3\n\t" | |
724 "punpckldq 6%1, %%mm0\n\t" | |
725 "punpckldq 9%1, %%mm3\n\t" | |
726 "movq %%mm0, %%mm1\n\t" | |
727 "movq %%mm0, %%mm2\n\t" | |
728 "movq %%mm3, %%mm4\n\t" | |
729 "movq %%mm3, %%mm5\n\t" | |
730 "psrlq $3, %%mm0\n\t" | |
731 "psrlq $3, %%mm3\n\t" | |
732 "pand %2, %%mm0\n\t" | |
733 "pand %2, %%mm3\n\t" | |
734 "psrlq $6, %%mm1\n\t" | |
735 "psrlq $6, %%mm4\n\t" | |
736 "pand %%mm6, %%mm1\n\t" | |
737 "pand %%mm6, %%mm4\n\t" | |
738 "psrlq $9, %%mm2\n\t" | |
739 "psrlq $9, %%mm5\n\t" | |
740 "pand %%mm7, %%mm2\n\t" | |
741 "pand %%mm7, %%mm5\n\t" | |
742 "por %%mm1, %%mm0\n\t" | |
743 "por %%mm4, %%mm3\n\t" | |
744 "por %%mm2, %%mm0\n\t" | |
745 "por %%mm5, %%mm3\n\t" | |
746 "psllq $16, %%mm3\n\t" | |
747 "por %%mm3, %%mm0\n\t" | |
748 MOVNTQ" %%mm0, %0\n\t" | |
749 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
750 d += 4; | |
751 s += 12; | |
752 } | |
6492 | 753 __asm __volatile(SFENCE:::"memory"); |
754 __asm __volatile(EMMS:::"memory"); | |
755 #endif | |
2741 | 756 while(s < end) |
757 { | |
758 const int b= *s++; | |
759 const int g= *s++; | |
760 const int r= *s++; | |
761 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | |
762 } | |
6492 | 763 } |
764 | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
765 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
766 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
767 const uint8_t *s = src; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
768 const uint8_t *end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
769 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
770 const uint8_t *mm_end; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
771 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
772 uint16_t *d = (uint16_t *)dst; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
773 end = s + src_size; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
774 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
775 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
776 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
777 "movq %0, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
778 "movq %1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
779 ::"m"(red_15mask),"m"(green_15mask)); |
6608
da27a1bc1763
fixing memory overwrite bugs in the new converters
michael
parents:
6606
diff
changeset
|
780 mm_end = end - 15; |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
781 while(s < mm_end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
782 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
783 __asm __volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
784 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
785 "movd %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
786 "movd 3%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
787 "punpckldq 6%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
788 "punpckldq 9%1, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
789 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
790 "movq %%mm0, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
791 "movq %%mm3, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
792 "movq %%mm3, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
793 "psllq $7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
794 "psllq $7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
795 "pand %%mm7, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
796 "pand %%mm7, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
797 "psrlq $6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
798 "psrlq $6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
799 "pand %%mm6, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
800 "pand %%mm6, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
801 "psrlq $19, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
802 "psrlq $19, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
803 "pand %2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
804 "pand %2, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
805 "por %%mm1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
806 "por %%mm4, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
807 "por %%mm2, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
808 "por %%mm5, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
809 "psllq $16, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
810 "por %%mm3, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
811 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
812 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
813 d += 4; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
814 s += 12; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
815 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
816 __asm __volatile(SFENCE:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
817 __asm __volatile(EMMS:::"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
818 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
819 while(s < end) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
820 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
821 const int r= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
822 const int g= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
823 const int b= *s++; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
824 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
825 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
826 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
827 |
6492 | 828 /* |
829 I use here less accurate approximation by simply | |
830 left-shifting the input | |
831 value and filling the low order bits with | |
832 zeroes. This method improves png's | |
833 compression but this scheme cannot reproduce white exactly, since it does not | |
834 generate an all-ones maximum value; the net effect is to darken the | |
835 image slightly. | |
836 | |
837 The better method should be "left bit replication": | |
838 | |
839 4 3 2 1 0 | |
840 --------- | |
841 1 1 0 1 1 | |
842 | |
843 7 6 5 4 3 2 1 0 | |
844 ---------------- | |
845 1 1 0 1 1 1 1 0 | |
846 |=======| |===| | |
847 | Leftmost Bits Repeated to Fill Open Bits | |
848 | | |
849 Original Bits | |
850 */ | |
851 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
852 { | |
853 const uint16_t *end; | |
854 #ifdef HAVE_MMX | |
855 const uint16_t *mm_end; | |
856 #endif | |
857 uint8_t *d = (uint8_t *)dst; | |
858 const uint16_t *s = (uint16_t *)src; | |
859 end = s + src_size/2; | |
860 #ifdef HAVE_MMX | |
861 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
6605 | 862 mm_end = end - 7; |
6492 | 863 while(s < mm_end) |
864 { | |
865 __asm __volatile( | |
866 PREFETCH" 32%1\n\t" | |
867 "movq %1, %%mm0\n\t" | |
868 "movq %1, %%mm1\n\t" | |
869 "movq %1, %%mm2\n\t" | |
870 "pand %2, %%mm0\n\t" | |
871 "pand %3, %%mm1\n\t" | |
872 "pand %4, %%mm2\n\t" | |
873 "psllq $3, %%mm0\n\t" | |
874 "psrlq $2, %%mm1\n\t" | |
875 "psrlq $7, %%mm2\n\t" | |
876 "movq %%mm0, %%mm3\n\t" | |
877 "movq %%mm1, %%mm4\n\t" | |
878 "movq %%mm2, %%mm5\n\t" | |
879 "punpcklwd %5, %%mm0\n\t" | |
880 "punpcklwd %5, %%mm1\n\t" | |
881 "punpcklwd %5, %%mm2\n\t" | |
882 "punpckhwd %5, %%mm3\n\t" | |
883 "punpckhwd %5, %%mm4\n\t" | |
884 "punpckhwd %5, %%mm5\n\t" | |
885 "psllq $8, %%mm1\n\t" | |
886 "psllq $16, %%mm2\n\t" | |
887 "por %%mm1, %%mm0\n\t" | |
888 "por %%mm2, %%mm0\n\t" | |
889 "psllq $8, %%mm4\n\t" | |
890 "psllq $16, %%mm5\n\t" | |
891 "por %%mm4, %%mm3\n\t" | |
892 "por %%mm5, %%mm3\n\t" | |
893 | |
894 "movq %%mm0, %%mm6\n\t" | |
895 "movq %%mm3, %%mm7\n\t" | |
896 | |
897 "movq 8%1, %%mm0\n\t" | |
898 "movq 8%1, %%mm1\n\t" | |
899 "movq 8%1, %%mm2\n\t" | |
900 "pand %2, %%mm0\n\t" | |
901 "pand %3, %%mm1\n\t" | |
902 "pand %4, %%mm2\n\t" | |
903 "psllq $3, %%mm0\n\t" | |
904 "psrlq $2, %%mm1\n\t" | |
905 "psrlq $7, %%mm2\n\t" | |
906 "movq %%mm0, %%mm3\n\t" | |
907 "movq %%mm1, %%mm4\n\t" | |
908 "movq %%mm2, %%mm5\n\t" | |
909 "punpcklwd %5, %%mm0\n\t" | |
910 "punpcklwd %5, %%mm1\n\t" | |
911 "punpcklwd %5, %%mm2\n\t" | |
912 "punpckhwd %5, %%mm3\n\t" | |
913 "punpckhwd %5, %%mm4\n\t" | |
914 "punpckhwd %5, %%mm5\n\t" | |
915 "psllq $8, %%mm1\n\t" | |
916 "psllq $16, %%mm2\n\t" | |
917 "por %%mm1, %%mm0\n\t" | |
918 "por %%mm2, %%mm0\n\t" | |
919 "psllq $8, %%mm4\n\t" | |
920 "psllq $16, %%mm5\n\t" | |
921 "por %%mm4, %%mm3\n\t" | |
922 "por %%mm5, %%mm3\n\t" | |
923 | |
924 :"=m"(*d) | |
925 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | |
926 :"memory"); | |
927 /* Borrowed 32 to 24 */ | |
928 __asm __volatile( | |
929 "movq %%mm0, %%mm4\n\t" | |
930 "movq %%mm3, %%mm5\n\t" | |
931 "movq %%mm6, %%mm0\n\t" | |
932 "movq %%mm7, %%mm1\n\t" | |
933 | |
934 "movq %%mm4, %%mm6\n\t" | |
935 "movq %%mm5, %%mm7\n\t" | |
936 "movq %%mm0, %%mm2\n\t" | |
937 "movq %%mm1, %%mm3\n\t" | |
938 | |
939 "psrlq $8, %%mm2\n\t" | |
940 "psrlq $8, %%mm3\n\t" | |
941 "psrlq $8, %%mm6\n\t" | |
942 "psrlq $8, %%mm7\n\t" | |
943 "pand %2, %%mm0\n\t" | |
944 "pand %2, %%mm1\n\t" | |
945 "pand %2, %%mm4\n\t" | |
946 "pand %2, %%mm5\n\t" | |
947 "pand %3, %%mm2\n\t" | |
948 "pand %3, %%mm3\n\t" | |
949 "pand %3, %%mm6\n\t" | |
950 "pand %3, %%mm7\n\t" | |
951 "por %%mm2, %%mm0\n\t" | |
952 "por %%mm3, %%mm1\n\t" | |
953 "por %%mm6, %%mm4\n\t" | |
954 "por %%mm7, %%mm5\n\t" | |
955 | |
956 "movq %%mm1, %%mm2\n\t" | |
957 "movq %%mm4, %%mm3\n\t" | |
958 "psllq $48, %%mm2\n\t" | |
959 "psllq $32, %%mm3\n\t" | |
960 "pand %4, %%mm2\n\t" | |
961 "pand %5, %%mm3\n\t" | |
962 "por %%mm2, %%mm0\n\t" | |
963 "psrlq $16, %%mm1\n\t" | |
964 "psrlq $32, %%mm4\n\t" | |
965 "psllq $16, %%mm5\n\t" | |
966 "por %%mm3, %%mm1\n\t" | |
967 "pand %6, %%mm5\n\t" | |
968 "por %%mm5, %%mm4\n\t" | |
969 | |
970 MOVNTQ" %%mm0, %0\n\t" | |
971 MOVNTQ" %%mm1, 8%0\n\t" | |
972 MOVNTQ" %%mm4, 16%0" | |
973 | |
974 :"=m"(*d) | |
975 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
976 :"memory"); | |
977 d += 24; | |
978 s += 8; | |
979 } | |
2741 | 980 __asm __volatile(SFENCE:::"memory"); |
981 __asm __volatile(EMMS:::"memory"); | |
6492 | 982 #endif |
983 while(s < end) | |
984 { | |
985 register uint16_t bgr; | |
986 bgr = *s++; | |
987 *d++ = (bgr&0x1F)<<3; | |
988 *d++ = (bgr&0x3E0)>>2; | |
989 *d++ = (bgr&0x7C00)>>7; | |
990 } | |
991 } | |
992 | |
993 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
994 { | |
995 const uint16_t *end; | |
996 #ifdef HAVE_MMX | |
997 const uint16_t *mm_end; | |
998 #endif | |
999 uint8_t *d = (uint8_t *)dst; | |
1000 const uint16_t *s = (const uint16_t *)src; | |
1001 end = s + src_size/2; | |
1002 #ifdef HAVE_MMX | |
1003 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
6605 | 1004 mm_end = end - 7; |
6492 | 1005 while(s < mm_end) |
2718 | 1006 { |
6492 | 1007 __asm __volatile( |
1008 PREFETCH" 32%1\n\t" | |
1009 "movq %1, %%mm0\n\t" | |
1010 "movq %1, %%mm1\n\t" | |
1011 "movq %1, %%mm2\n\t" | |
1012 "pand %2, %%mm0\n\t" | |
1013 "pand %3, %%mm1\n\t" | |
1014 "pand %4, %%mm2\n\t" | |
1015 "psllq $3, %%mm0\n\t" | |
1016 "psrlq $3, %%mm1\n\t" | |
1017 "psrlq $8, %%mm2\n\t" | |
1018 "movq %%mm0, %%mm3\n\t" | |
1019 "movq %%mm1, %%mm4\n\t" | |
1020 "movq %%mm2, %%mm5\n\t" | |
1021 "punpcklwd %5, %%mm0\n\t" | |
1022 "punpcklwd %5, %%mm1\n\t" | |
1023 "punpcklwd %5, %%mm2\n\t" | |
1024 "punpckhwd %5, %%mm3\n\t" | |
1025 "punpckhwd %5, %%mm4\n\t" | |
1026 "punpckhwd %5, %%mm5\n\t" | |
1027 "psllq $8, %%mm1\n\t" | |
1028 "psllq $16, %%mm2\n\t" | |
1029 "por %%mm1, %%mm0\n\t" | |
1030 "por %%mm2, %%mm0\n\t" | |
1031 "psllq $8, %%mm4\n\t" | |
1032 "psllq $16, %%mm5\n\t" | |
1033 "por %%mm4, %%mm3\n\t" | |
1034 "por %%mm5, %%mm3\n\t" | |
1035 | |
1036 "movq %%mm0, %%mm6\n\t" | |
1037 "movq %%mm3, %%mm7\n\t" | |
1038 | |
1039 "movq 8%1, %%mm0\n\t" | |
1040 "movq 8%1, %%mm1\n\t" | |
1041 "movq 8%1, %%mm2\n\t" | |
1042 "pand %2, %%mm0\n\t" | |
1043 "pand %3, %%mm1\n\t" | |
1044 "pand %4, %%mm2\n\t" | |
1045 "psllq $3, %%mm0\n\t" | |
1046 "psrlq $3, %%mm1\n\t" | |
1047 "psrlq $8, %%mm2\n\t" | |
1048 "movq %%mm0, %%mm3\n\t" | |
1049 "movq %%mm1, %%mm4\n\t" | |
1050 "movq %%mm2, %%mm5\n\t" | |
1051 "punpcklwd %5, %%mm0\n\t" | |
1052 "punpcklwd %5, %%mm1\n\t" | |
1053 "punpcklwd %5, %%mm2\n\t" | |
1054 "punpckhwd %5, %%mm3\n\t" | |
1055 "punpckhwd %5, %%mm4\n\t" | |
1056 "punpckhwd %5, %%mm5\n\t" | |
1057 "psllq $8, %%mm1\n\t" | |
1058 "psllq $16, %%mm2\n\t" | |
1059 "por %%mm1, %%mm0\n\t" | |
1060 "por %%mm2, %%mm0\n\t" | |
1061 "psllq $8, %%mm4\n\t" | |
1062 "psllq $16, %%mm5\n\t" | |
1063 "por %%mm4, %%mm3\n\t" | |
1064 "por %%mm5, %%mm3\n\t" | |
1065 :"=m"(*d) | |
1066 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |
1067 :"memory"); | |
1068 /* Borrowed 32 to 24 */ | |
1069 __asm __volatile( | |
1070 "movq %%mm0, %%mm4\n\t" | |
1071 "movq %%mm3, %%mm5\n\t" | |
1072 "movq %%mm6, %%mm0\n\t" | |
1073 "movq %%mm7, %%mm1\n\t" | |
1074 | |
1075 "movq %%mm4, %%mm6\n\t" | |
1076 "movq %%mm5, %%mm7\n\t" | |
1077 "movq %%mm0, %%mm2\n\t" | |
1078 "movq %%mm1, %%mm3\n\t" | |
1079 | |
1080 "psrlq $8, %%mm2\n\t" | |
1081 "psrlq $8, %%mm3\n\t" | |
1082 "psrlq $8, %%mm6\n\t" | |
1083 "psrlq $8, %%mm7\n\t" | |
1084 "pand %2, %%mm0\n\t" | |
1085 "pand %2, %%mm1\n\t" | |
1086 "pand %2, %%mm4\n\t" | |
1087 "pand %2, %%mm5\n\t" | |
1088 "pand %3, %%mm2\n\t" | |
1089 "pand %3, %%mm3\n\t" | |
1090 "pand %3, %%mm6\n\t" | |
1091 "pand %3, %%mm7\n\t" | |
1092 "por %%mm2, %%mm0\n\t" | |
1093 "por %%mm3, %%mm1\n\t" | |
1094 "por %%mm6, %%mm4\n\t" | |
1095 "por %%mm7, %%mm5\n\t" | |
1096 | |
1097 "movq %%mm1, %%mm2\n\t" | |
1098 "movq %%mm4, %%mm3\n\t" | |
1099 "psllq $48, %%mm2\n\t" | |
1100 "psllq $32, %%mm3\n\t" | |
1101 "pand %4, %%mm2\n\t" | |
1102 "pand %5, %%mm3\n\t" | |
1103 "por %%mm2, %%mm0\n\t" | |
1104 "psrlq $16, %%mm1\n\t" | |
1105 "psrlq $32, %%mm4\n\t" | |
1106 "psllq $16, %%mm5\n\t" | |
1107 "por %%mm3, %%mm1\n\t" | |
1108 "pand %6, %%mm5\n\t" | |
1109 "por %%mm5, %%mm4\n\t" | |
1110 | |
1111 MOVNTQ" %%mm0, %0\n\t" | |
1112 MOVNTQ" %%mm1, 8%0\n\t" | |
1113 MOVNTQ" %%mm4, 16%0" | |
1114 | |
1115 :"=m"(*d) | |
1116 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
1117 :"memory"); | |
1118 d += 24; | |
1119 s += 8; | |
1120 } | |
1121 __asm __volatile(SFENCE:::"memory"); | |
1122 __asm __volatile(EMMS:::"memory"); | |
1123 #endif | |
1124 while(s < end) | |
1125 { | |
1126 register uint16_t bgr; | |
1127 bgr = *s++; | |
1128 *d++ = (bgr&0x1F)<<3; | |
1129 *d++ = (bgr&0x7E0)>>3; | |
1130 *d++ = (bgr&0xF800)>>8; | |
1131 } | |
1132 } | |
2718 | 1133 |
6492 | 1134 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
1135 { | |
1136 const uint16_t *end; | |
1137 #ifdef HAVE_MMX | |
1138 const uint16_t *mm_end; | |
1139 #endif | |
1140 uint8_t *d = (uint8_t *)dst; | |
1141 const uint16_t *s = (const uint16_t *)src; | |
1142 end = s + src_size/2; | |
1143 #ifdef HAVE_MMX | |
1144 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
1145 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | |
6605 | 1146 mm_end = end - 3; |
6492 | 1147 while(s < mm_end) |
1148 { | |
1149 __asm __volatile( | |
1150 PREFETCH" 32%1\n\t" | |
1151 "movq %1, %%mm0\n\t" | |
1152 "movq %1, %%mm1\n\t" | |
1153 "movq %1, %%mm2\n\t" | |
1154 "pand %2, %%mm0\n\t" | |
1155 "pand %3, %%mm1\n\t" | |
1156 "pand %4, %%mm2\n\t" | |
1157 "psllq $3, %%mm0\n\t" | |
1158 "psrlq $2, %%mm1\n\t" | |
1159 "psrlq $7, %%mm2\n\t" | |
1160 "movq %%mm0, %%mm3\n\t" | |
1161 "movq %%mm1, %%mm4\n\t" | |
1162 "movq %%mm2, %%mm5\n\t" | |
1163 "punpcklwd %%mm7, %%mm0\n\t" | |
1164 "punpcklwd %%mm7, %%mm1\n\t" | |
1165 "punpcklwd %%mm7, %%mm2\n\t" | |
1166 "punpckhwd %%mm7, %%mm3\n\t" | |
1167 "punpckhwd %%mm7, %%mm4\n\t" | |
1168 "punpckhwd %%mm7, %%mm5\n\t" | |
1169 "psllq $8, %%mm1\n\t" | |
1170 "psllq $16, %%mm2\n\t" | |
1171 "por %%mm1, %%mm0\n\t" | |
1172 "por %%mm2, %%mm0\n\t" | |
1173 "psllq $8, %%mm4\n\t" | |
1174 "psllq $16, %%mm5\n\t" | |
1175 "por %%mm4, %%mm3\n\t" | |
1176 "por %%mm5, %%mm3\n\t" | |
1177 MOVNTQ" %%mm0, %0\n\t" | |
1178 MOVNTQ" %%mm3, 8%0\n\t" | |
1179 :"=m"(*d) | |
1180 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) | |
1181 :"memory"); | |
1182 d += 16; | |
1183 s += 4; | |
1184 } | |
1185 __asm __volatile(SFENCE:::"memory"); | |
1186 __asm __volatile(EMMS:::"memory"); | |
1187 #endif | |
1188 while(s < end) | |
1189 { | |
1190 register uint16_t bgr; | |
1191 bgr = *s++; | |
1192 *d++ = (bgr&0x1F)<<3; | |
1193 *d++ = (bgr&0x3E0)>>2; | |
1194 *d++ = (bgr&0x7C00)>>7; | |
1195 *d++ = 0; | |
2718 | 1196 } |
6492 | 1197 } |
1198 | |
1199 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
1200 { | |
1201 const uint16_t *end; | |
1202 #ifdef HAVE_MMX | |
1203 const uint16_t *mm_end; | |
2741 | 1204 #endif |
6492 | 1205 uint8_t *d = (uint8_t *)dst; |
1206 const uint16_t *s = (uint16_t *)src; | |
1207 end = s + src_size/2; | |
1208 #ifdef HAVE_MMX | |
1209 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
1210 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | |
6605 | 1211 mm_end = end - 3; |
6492 | 1212 while(s < mm_end) |
1213 { | |
1214 __asm __volatile( | |
1215 PREFETCH" 32%1\n\t" | |
1216 "movq %1, %%mm0\n\t" | |
1217 "movq %1, %%mm1\n\t" | |
1218 "movq %1, %%mm2\n\t" | |
1219 "pand %2, %%mm0\n\t" | |
1220 "pand %3, %%mm1\n\t" | |
1221 "pand %4, %%mm2\n\t" | |
1222 "psllq $3, %%mm0\n\t" | |
1223 "psrlq $3, %%mm1\n\t" | |
1224 "psrlq $8, %%mm2\n\t" | |
1225 "movq %%mm0, %%mm3\n\t" | |
1226 "movq %%mm1, %%mm4\n\t" | |
1227 "movq %%mm2, %%mm5\n\t" | |
1228 "punpcklwd %%mm7, %%mm0\n\t" | |
1229 "punpcklwd %%mm7, %%mm1\n\t" | |
1230 "punpcklwd %%mm7, %%mm2\n\t" | |
1231 "punpckhwd %%mm7, %%mm3\n\t" | |
1232 "punpckhwd %%mm7, %%mm4\n\t" | |
1233 "punpckhwd %%mm7, %%mm5\n\t" | |
1234 "psllq $8, %%mm1\n\t" | |
1235 "psllq $16, %%mm2\n\t" | |
1236 "por %%mm1, %%mm0\n\t" | |
1237 "por %%mm2, %%mm0\n\t" | |
1238 "psllq $8, %%mm4\n\t" | |
1239 "psllq $16, %%mm5\n\t" | |
1240 "por %%mm4, %%mm3\n\t" | |
1241 "por %%mm5, %%mm3\n\t" | |
1242 MOVNTQ" %%mm0, %0\n\t" | |
1243 MOVNTQ" %%mm3, 8%0\n\t" | |
1244 :"=m"(*d) | |
1245 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) | |
1246 :"memory"); | |
1247 d += 16; | |
1248 s += 4; | |
1249 } | |
1250 __asm __volatile(SFENCE:::"memory"); | |
1251 __asm __volatile(EMMS:::"memory"); | |
1252 #endif | |
1253 while(s < end) | |
1254 { | |
1255 register uint16_t bgr; | |
1256 bgr = *s++; | |
1257 *d++ = (bgr&0x1F)<<3; | |
1258 *d++ = (bgr&0x7E0)>>3; | |
1259 *d++ = (bgr&0xF800)>>8; | |
1260 *d++ = 0; | |
1261 } | |
2718 | 1262 } |
2694 | 1263 |
3132 | 1264 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
2755 | 1265 { |
1266 #ifdef HAVE_MMX | |
6492 | 1267 /* TODO: unroll this loop */ |
2755 | 1268 asm volatile ( |
1269 "xorl %%eax, %%eax \n\t" | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1270 ".balign 16 \n\t" |
2755 | 1271 "1: \n\t" |
1272 PREFETCH" 32(%0, %%eax) \n\t" | |
1273 "movq (%0, %%eax), %%mm0 \n\t" | |
1274 "movq %%mm0, %%mm1 \n\t" | |
1275 "movq %%mm0, %%mm2 \n\t" | |
1276 "pslld $16, %%mm0 \n\t" | |
1277 "psrld $16, %%mm1 \n\t" | |
6492 | 1278 "pand "MANGLE(mask32r)", %%mm0 \n\t" |
1279 "pand "MANGLE(mask32g)", %%mm2 \n\t" | |
1280 "pand "MANGLE(mask32b)", %%mm1 \n\t" | |
2755 | 1281 "por %%mm0, %%mm2 \n\t" |
1282 "por %%mm1, %%mm2 \n\t" | |
1283 MOVNTQ" %%mm2, (%1, %%eax) \n\t" | |
6096 | 1284 "addl $8, %%eax \n\t" |
2755 | 1285 "cmpl %2, %%eax \n\t" |
1286 " jb 1b \n\t" | |
6605 | 1287 :: "r" (src), "r"(dst), "r" (src_size-7) |
2755 | 1288 : "%eax" |
1289 ); | |
2766 | 1290 |
1291 __asm __volatile(SFENCE:::"memory"); | |
1292 __asm __volatile(EMMS:::"memory"); | |
2755 | 1293 #else |
6492 | 1294 unsigned i; |
1295 unsigned num_pixels = src_size >> 2; | |
2755 | 1296 for(i=0; i<num_pixels; i++) |
1297 { | |
1298 dst[4*i + 0] = src[4*i + 2]; | |
1299 dst[4*i + 1] = src[4*i + 1]; | |
1300 dst[4*i + 2] = src[4*i + 0]; | |
1301 } | |
1302 #endif | |
1303 } | |
1304 | |
5582 | 1305 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
1306 { | |
6492 | 1307 unsigned i; |
5582 | 1308 #ifdef HAVE_MMX |
1309 int mmx_size= 23 - src_size; | |
1310 asm volatile ( | |
1311 "movq "MANGLE(mask24r)", %%mm5 \n\t" | |
1312 "movq "MANGLE(mask24g)", %%mm6 \n\t" | |
1313 "movq "MANGLE(mask24b)", %%mm7 \n\t" | |
1314 ".balign 16 \n\t" | |
1315 "1: \n\t" | |
1316 PREFETCH" 32(%1, %%eax) \n\t" | |
1317 "movq (%1, %%eax), %%mm0 \n\t" // BGR BGR BG | |
1318 "movq (%1, %%eax), %%mm1 \n\t" // BGR BGR BG | |
1319 "movq 2(%1, %%eax), %%mm2 \n\t" // R BGR BGR B | |
1320 "psllq $16, %%mm0 \n\t" // 00 BGR BGR | |
1321 "pand %%mm5, %%mm0 \n\t" | |
1322 "pand %%mm6, %%mm1 \n\t" | |
1323 "pand %%mm7, %%mm2 \n\t" | |
1324 "por %%mm0, %%mm1 \n\t" | |
1325 "por %%mm2, %%mm1 \n\t" | |
1326 "movq 6(%1, %%eax), %%mm0 \n\t" // BGR BGR BG | |
1327 MOVNTQ" %%mm1, (%2, %%eax) \n\t" // RGB RGB RG | |
1328 "movq 8(%1, %%eax), %%mm1 \n\t" // R BGR BGR B | |
1329 "movq 10(%1, %%eax), %%mm2 \n\t" // GR BGR BGR | |
1330 "pand %%mm7, %%mm0 \n\t" | |
1331 "pand %%mm5, %%mm1 \n\t" | |
1332 "pand %%mm6, %%mm2 \n\t" | |
1333 "por %%mm0, %%mm1 \n\t" | |
1334 "por %%mm2, %%mm1 \n\t" | |
1335 "movq 14(%1, %%eax), %%mm0 \n\t" // R BGR BGR B | |
1336 MOVNTQ" %%mm1, 8(%2, %%eax) \n\t" // B RGB RGB R | |
1337 "movq 16(%1, %%eax), %%mm1 \n\t" // GR BGR BGR | |
1338 "movq 18(%1, %%eax), %%mm2 \n\t" // BGR BGR BG | |
1339 "pand %%mm6, %%mm0 \n\t" | |
1340 "pand %%mm7, %%mm1 \n\t" | |
1341 "pand %%mm5, %%mm2 \n\t" | |
1342 "por %%mm0, %%mm1 \n\t" | |
1343 "por %%mm2, %%mm1 \n\t" | |
1344 MOVNTQ" %%mm1, 16(%2, %%eax) \n\t" | |
1345 "addl $24, %%eax \n\t" | |
1346 " js 1b \n\t" | |
1347 : "+a" (mmx_size) | |
1348 : "r" (src-mmx_size), "r"(dst-mmx_size) | |
1349 ); | |
1350 | |
1351 __asm __volatile(SFENCE:::"memory"); | |
1352 __asm __volatile(EMMS:::"memory"); | |
1353 | |
6096 | 1354 if(mmx_size==23) return; //finihsed, was multiple of 8 |
6492 | 1355 |
5582 | 1356 src+= src_size; |
1357 dst+= src_size; | |
6492 | 1358 src_size= 23-mmx_size; |
5582 | 1359 src-= src_size; |
1360 dst-= src_size; | |
1361 #endif | |
1362 for(i=0; i<src_size; i+=3) | |
1363 { | |
6492 | 1364 register uint8_t x; |
5582 | 1365 x = src[i + 2]; |
1366 dst[i + 1] = src[i + 1]; | |
1367 dst[i + 2] = src[i + 0]; | |
1368 dst[i + 0] = x; | |
1369 } | |
1370 } | |
1371 | |
5588 | 1372 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
2725 | 1373 unsigned int width, unsigned int height, |
5588 | 1374 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma) |
2701 | 1375 { |
6492 | 1376 unsigned y; |
1377 const unsigned chromWidth= width>>1; | |
2723 | 1378 for(y=0; y<height; y++) |
1379 { | |
2702 | 1380 #ifdef HAVE_MMX |
2723 | 1381 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) |
1382 asm volatile( | |
1383 "xorl %%eax, %%eax \n\t" | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1384 ".balign 16 \n\t" |
2723 | 1385 "1: \n\t" |
1386 PREFETCH" 32(%1, %%eax, 2) \n\t" | |
1387 PREFETCH" 32(%2, %%eax) \n\t" | |
1388 PREFETCH" 32(%3, %%eax) \n\t" | |
1389 "movq (%2, %%eax), %%mm0 \n\t" // U(0) | |
1390 "movq %%mm0, %%mm2 \n\t" // U(0) | |
1391 "movq (%3, %%eax), %%mm1 \n\t" // V(0) | |
1392 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1393 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | |
1394 | |
1395 "movq (%1, %%eax,2), %%mm3 \n\t" // Y(0) | |
1396 "movq 8(%1, %%eax,2), %%mm5 \n\t" // Y(8) | |
1397 "movq %%mm3, %%mm4 \n\t" // Y(0) | |
1398 "movq %%mm5, %%mm6 \n\t" // Y(8) | |
1399 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) | |
1400 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) | |
1401 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) | |
1402 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) | |
2702 | 1403 |
2723 | 1404 MOVNTQ" %%mm3, (%0, %%eax, 4) \n\t" |
1405 MOVNTQ" %%mm4, 8(%0, %%eax, 4) \n\t" | |
1406 MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t" | |
1407 MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t" | |
2702 | 1408 |
2723 | 1409 "addl $8, %%eax \n\t" |
1410 "cmpl %4, %%eax \n\t" | |
1411 " jb 1b \n\t" | |
1412 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth) | |
1413 : "%eax" | |
1414 ); | |
2702 | 1415 #else |
6492 | 1416 #if __WORDSIZE >= 64 |
2723 | 1417 int i; |
6492 | 1418 uint64_t *ldst = (uint64_t *) dst; |
1419 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | |
1420 for(i = 0; i < chromWidth; i += 2){ | |
1421 uint64_t k, l; | |
1422 k = yc[0] + (uc[0] << 8) + | |
1423 (yc[1] << 16) + (vc[0] << 24); | |
1424 l = yc[2] + (uc[1] << 8) + | |
1425 (yc[3] << 16) + (vc[1] << 24); | |
1426 *ldst++ = k + (l << 32); | |
1427 yc += 4; | |
1428 uc += 2; | |
1429 vc += 2; | |
2723 | 1430 } |
6492 | 1431 |
1432 #else | |
1433 int i, *idst = (int32_t *) dst; | |
1434 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | |
1435 for(i = 0; i < chromWidth; i++){ | |
1436 *idst++ = yc[0] + (uc[0] << 8) + | |
1437 (yc[1] << 16) + (vc[0] << 24); | |
1438 yc += 2; | |
1439 uc++; | |
1440 vc++; | |
1441 } | |
1442 #endif | |
2723 | 1443 #endif |
5588 | 1444 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) |
2723 | 1445 { |
1446 usrc += chromStride; | |
1447 vsrc += chromStride; | |
1448 } | |
1449 ysrc += lumStride; | |
1450 dst += dstStride; | |
2701 | 1451 } |
2723 | 1452 #ifdef HAVE_MMX |
1453 asm( EMMS" \n\t" | |
1454 SFENCE" \n\t" | |
1455 :::"memory"); | |
2702 | 1456 #endif |
2701 | 1457 } |
1458 | |
2724 | 1459 /** |
1460 * | |
1461 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
1462 * problem for anyone then tell me, and ill fix it) | |
1463 */ | |
5588 | 1464 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1465 unsigned int width, unsigned int height, | |
1466 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride) | |
1467 { | |
1468 //FIXME interpolate chroma | |
1469 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); | |
1470 } | |
1471 | |
1472 /** | |
1473 * | |
1474 * width should be a multiple of 16 | |
1475 */ | |
1476 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
1477 unsigned int width, unsigned int height, | |
1478 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride) | |
1479 { | |
1480 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); | |
1481 } | |
1482 | |
1483 /** | |
1484 * | |
1485 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
1486 * problem for anyone then tell me, and ill fix it) | |
1487 */ | |
3132 | 1488 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
2725 | 1489 unsigned int width, unsigned int height, |
1490 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | |
2701 | 1491 { |
6492 | 1492 unsigned y; |
1493 const unsigned chromWidth= width>>1; | |
2724 | 1494 for(y=0; y<height; y+=2) |
1495 { | |
2704 | 1496 #ifdef HAVE_MMX |
2724 | 1497 asm volatile( |
1498 "xorl %%eax, %%eax \n\t" | |
1499 "pcmpeqw %%mm7, %%mm7 \n\t" | |
1500 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1501 ".balign 16 \n\t" |
2724 | 1502 "1: \n\t" |
1503 PREFETCH" 64(%0, %%eax, 4) \n\t" | |
1504 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1505 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1506 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) | |
1507 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) | |
1508 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) | |
1509 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) | |
1510 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | |
1511 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | |
1512 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1513 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | |
1514 | |
1515 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t" | |
2704 | 1516 |
2724 | 1517 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(8) |
1518 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(12) | |
1519 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) | |
1520 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) | |
1521 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) | |
1522 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) | |
1523 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | |
1524 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | |
1525 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | |
1526 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | |
2704 | 1527 |
2724 | 1528 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t" |
1529 | |
1530 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) | |
1531 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | |
1532 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | |
1533 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | |
1534 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | |
1535 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | |
1536 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | |
1537 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | |
2704 | 1538 |
2724 | 1539 MOVNTQ" %%mm0, (%3, %%eax) \n\t" |
1540 MOVNTQ" %%mm2, (%2, %%eax) \n\t" | |
1541 | |
1542 "addl $8, %%eax \n\t" | |
1543 "cmpl %4, %%eax \n\t" | |
1544 " jb 1b \n\t" | |
2725 | 1545 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) |
1546 : "memory", "%eax" | |
1547 ); | |
2704 | 1548 |
2806 | 1549 ydst += lumStride; |
1550 src += srcStride; | |
1551 | |
2725 | 1552 asm volatile( |
1553 "xorl %%eax, %%eax \n\t" | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1554 ".balign 16 \n\t" |
2724 | 1555 "1: \n\t" |
1556 PREFETCH" 64(%0, %%eax, 4) \n\t" | |
1557 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1558 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1559 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) | |
1560 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12) | |
1561 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | |
1562 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | |
1563 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | |
1564 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | |
1565 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | |
1566 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | |
2704 | 1567 |
2724 | 1568 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t" |
1569 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t" | |
1570 | |
1571 "addl $8, %%eax \n\t" | |
2725 | 1572 "cmpl %4, %%eax \n\t" |
2724 | 1573 " jb 1b \n\t" |
2704 | 1574 |
2806 | 1575 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) |
2724 | 1576 : "memory", "%eax" |
1577 ); | |
2704 | 1578 #else |
6492 | 1579 unsigned i; |
2724 | 1580 for(i=0; i<chromWidth; i++) |
1581 { | |
1582 ydst[2*i+0] = src[4*i+0]; | |
1583 udst[i] = src[4*i+1]; | |
1584 ydst[2*i+1] = src[4*i+2]; | |
1585 vdst[i] = src[4*i+3]; | |
1586 } | |
1587 ydst += lumStride; | |
1588 src += srcStride; | |
1589 | |
1590 for(i=0; i<chromWidth; i++) | |
1591 { | |
1592 ydst[2*i+0] = src[4*i+0]; | |
1593 ydst[2*i+1] = src[4*i+2]; | |
1594 } | |
1595 #endif | |
1596 udst += chromStride; | |
1597 vdst += chromStride; | |
1598 ydst += lumStride; | |
1599 src += srcStride; | |
2701 | 1600 } |
2724 | 1601 #ifdef HAVE_MMX |
2847 | 1602 asm volatile( EMMS" \n\t" |
1603 SFENCE" \n\t" | |
1604 :::"memory"); | |
2704 | 1605 #endif |
2723 | 1606 } |
2801 | 1607 |
6484
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1608 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1609 uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1610 unsigned int width, unsigned int height, unsigned int lumStride, unsigned int chromStride) |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1611 { |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1612 /* Y Plane */ |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1613 memcpy(ydst, ysrc, width*height); |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1614 |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1615 /* XXX: implement upscaling for U,V */ |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1616 } |
c5cf988c6d6f
pre-yvu9toyv12 converter, only grayscale Y-plane coping :)
alex
parents:
6096
diff
changeset
|
1617 |
6582
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1618 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1619 { |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1620 int x,y; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1621 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1622 // first line |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1623 for(x=0; x<srcWidth; x++){ |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1624 dst[2*x+0]= |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1625 dst[2*x+1]= src[x]; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1626 } |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1627 dst+= dstStride; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1628 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1629 for(y=1; y<srcHeight; y++){ |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1630 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1631 const int mmxSize= srcWidth; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1632 asm volatile( |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1633 "movl %4, %%eax \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1634 "1: \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1635 "movq (%0, %%eax), %%mm0 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1636 "movq (%1, %%eax), %%mm1 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1637 "movq 1(%0, %%eax), %%mm2 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1638 "movq 1(%1, %%eax), %%mm3 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1639 "movq %%mm0, %%mm4 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1640 "movq %%mm1, %%mm5 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1641 PAVGB" %%mm3, %%mm0 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1642 PAVGB" %%mm3, %%mm0 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1643 PAVGB" %%mm4, %%mm3 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1644 PAVGB" %%mm4, %%mm3 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1645 PAVGB" %%mm2, %%mm1 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1646 PAVGB" %%mm2, %%mm1 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1647 PAVGB" %%mm5, %%mm2 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1648 PAVGB" %%mm5, %%mm2 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1649 "movq %%mm3, %%mm4 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1650 "movq %%mm2, %%mm5 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1651 "punpcklbw %%mm1, %%mm3 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1652 "punpckhbw %%mm1, %%mm4 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1653 "punpcklbw %%mm0, %%mm2 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1654 "punpckhbw %%mm0, %%mm5 \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1655 #if 1 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1656 MOVNTQ" %%mm3, (%2, %%eax, 2) \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1657 MOVNTQ" %%mm4, 8(%2, %%eax, 2) \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1658 MOVNTQ" %%mm2, (%3, %%eax, 2) \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1659 MOVNTQ" %%mm5, 8(%3, %%eax, 2) \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1660 #else |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1661 "movq %%mm3, (%2, %%eax, 2) \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1662 "movq %%mm4, 8(%2, %%eax, 2) \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1663 "movq %%mm2, (%3, %%eax, 2) \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1664 "movq %%mm5, 8(%3, %%eax, 2) \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1665 #endif |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1666 "addl $8, %%eax \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1667 " js 1b \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1668 :: "r" (src + mmxSize-1), "r" (src + srcStride + mmxSize-1), |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1669 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1670 "g" (-mmxSize) |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1671 : "%eax" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1672 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1673 ); |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1674 dst[0]= |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1675 dst[dstStride]= src[0]; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1676 #else |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1677 dst[0]= |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1678 dst[dstStride]= src[0]; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1679 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1680 for(x=0; x<srcWidth-1; x++){ |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1681 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1682 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1683 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1684 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1685 } |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1686 #endif |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1687 dst[srcWidth*2 -1]= |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1688 dst[srcWidth*2 -1 + dstStride]= src[srcWidth-1]; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1689 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1690 dst+=dstStride*2; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1691 src+=srcStride; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1692 } |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1693 src-=srcStride; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1694 |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1695 // last line |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1696 for(x=0; x<srcWidth; x++){ |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1697 dst[2*x+0]= |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1698 dst[2*x+1]= src[x]; |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1699 } |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1700 #ifdef HAVE_MMX |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1701 asm volatile( EMMS" \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1702 SFENCE" \n\t" |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1703 :::"memory"); |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1704 #endif |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1705 } |
f98313dcd428
yvu9 -> yv12 unscaled converter with linear chroma scaling
michael
parents:
6492
diff
changeset
|
1706 |
2801 | 1707 /** |
1708 * | |
1709 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | |
1710 * problem for anyone then tell me, and ill fix it) | |
3132 | 1711 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version |
2801 | 1712 */ |
3132 | 1713 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
2801 | 1714 unsigned int width, unsigned int height, |
1715 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | |
1716 { | |
6492 | 1717 unsigned y; |
1718 const unsigned chromWidth= width>>1; | |
2801 | 1719 for(y=0; y<height; y+=2) |
1720 { | |
2847 | 1721 #ifdef HAVE_MMX |
1722 asm volatile( | |
1723 "xorl %%eax, %%eax \n\t" | |
1724 "pcmpeqw %%mm7, %%mm7 \n\t" | |
1725 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |
1726 ".balign 16 \n\t" | |
1727 "1: \n\t" | |
1728 PREFETCH" 64(%0, %%eax, 4) \n\t" | |
1729 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) | |
1730 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4) | |
1731 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) | |
1732 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) | |
1733 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) | |
1734 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) | |
1735 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | |
1736 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | |
1737 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1738 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | |
1739 | |
1740 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t" | |
1741 | |
1742 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(8) | |
1743 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // UYVY UYVY(12) | |
1744 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) | |
1745 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) | |
1746 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) | |
1747 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) | |
1748 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | |
1749 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | |
1750 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | |
1751 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | |
1752 | |
1753 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t" | |
1754 | |
1755 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) | |
1756 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | |
1757 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | |
1758 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | |
1759 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | |
1760 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | |
1761 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | |
1762 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | |
1763 | |
1764 MOVNTQ" %%mm0, (%3, %%eax) \n\t" | |
1765 MOVNTQ" %%mm2, (%2, %%eax) \n\t" | |
1766 | |
1767 "addl $8, %%eax \n\t" | |
1768 "cmpl %4, %%eax \n\t" | |
1769 " jb 1b \n\t" | |
1770 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) | |
1771 : "memory", "%eax" | |
1772 ); | |
1773 | |
1774 ydst += lumStride; | |
1775 src += srcStride; | |
1776 | |
1777 asm volatile( | |
1778 "xorl %%eax, %%eax \n\t" | |
1779 ".balign 16 \n\t" | |
1780 "1: \n\t" | |
1781 PREFETCH" 64(%0, %%eax, 4) \n\t" | |
1782 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1783 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1784 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) | |
1785 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12) | |
1786 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | |
1787 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | |
1788 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | |
1789 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | |
1790 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | |
1791 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | |
1792 | |
1793 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t" | |
1794 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t" | |
1795 | |
1796 "addl $8, %%eax \n\t" | |
1797 "cmpl %4, %%eax \n\t" | |
1798 " jb 1b \n\t" | |
1799 | |
1800 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) | |
1801 : "memory", "%eax" | |
1802 ); | |
1803 #else | |
6492 | 1804 unsigned i; |
2801 | 1805 for(i=0; i<chromWidth; i++) |
1806 { | |
1807 udst[i] = src[4*i+0]; | |
1808 ydst[2*i+0] = src[4*i+1]; | |
1809 vdst[i] = src[4*i+2]; | |
1810 ydst[2*i+1] = src[4*i+3]; | |
1811 } | |
1812 ydst += lumStride; | |
1813 src += srcStride; | |
1814 | |
1815 for(i=0; i<chromWidth; i++) | |
1816 { | |
1817 ydst[2*i+0] = src[4*i+1]; | |
1818 ydst[2*i+1] = src[4*i+3]; | |
1819 } | |
2847 | 1820 #endif |
2801 | 1821 udst += chromStride; |
1822 vdst += chromStride; | |
1823 ydst += lumStride; | |
1824 src += srcStride; | |
1825 } | |
2847 | 1826 #ifdef HAVE_MMX |
1827 asm volatile( EMMS" \n\t" | |
1828 SFENCE" \n\t" | |
1829 :::"memory"); | |
1830 #endif | |
2801 | 1831 } |
1832 | |
3132 | 1833 /** |
1834 * | |
1835 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a | |
1836 * problem for anyone then tell me, and ill fix it) | |
4622 | 1837 * chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version |
3132 | 1838 */ |
1839 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
1840 unsigned int width, unsigned int height, | |
1841 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | |
1842 { | |
6492 | 1843 unsigned y; |
1844 const unsigned chromWidth= width>>1; | |
4622 | 1845 #ifdef HAVE_MMX |
1846 for(y=0; y<height-2; y+=2) | |
1847 { | |
6492 | 1848 unsigned i; |
4622 | 1849 for(i=0; i<2; i++) |
1850 { | |
1851 asm volatile( | |
1852 "movl %2, %%eax \n\t" | |
4923 | 1853 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1854 "movq "MANGLE(w1111)", %%mm5 \n\t" | |
4622 | 1855 "pxor %%mm7, %%mm7 \n\t" |
1856 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1857 ".balign 16 \n\t" | |
1858 "1: \n\t" | |
1859 PREFETCH" 64(%0, %%ebx) \n\t" | |
1860 "movd (%0, %%ebx), %%mm0 \n\t" | |
1861 "movd 3(%0, %%ebx), %%mm1 \n\t" | |
1862 "punpcklbw %%mm7, %%mm0 \n\t" | |
1863 "punpcklbw %%mm7, %%mm1 \n\t" | |
1864 "movd 6(%0, %%ebx), %%mm2 \n\t" | |
1865 "movd 9(%0, %%ebx), %%mm3 \n\t" | |
1866 "punpcklbw %%mm7, %%mm2 \n\t" | |
1867 "punpcklbw %%mm7, %%mm3 \n\t" | |
1868 "pmaddwd %%mm6, %%mm0 \n\t" | |
1869 "pmaddwd %%mm6, %%mm1 \n\t" | |
1870 "pmaddwd %%mm6, %%mm2 \n\t" | |
1871 "pmaddwd %%mm6, %%mm3 \n\t" | |
1872 #ifndef FAST_BGR2YV12 | |
1873 "psrad $8, %%mm0 \n\t" | |
1874 "psrad $8, %%mm1 \n\t" | |
1875 "psrad $8, %%mm2 \n\t" | |
1876 "psrad $8, %%mm3 \n\t" | |
1877 #endif | |
1878 "packssdw %%mm1, %%mm0 \n\t" | |
1879 "packssdw %%mm3, %%mm2 \n\t" | |
1880 "pmaddwd %%mm5, %%mm0 \n\t" | |
1881 "pmaddwd %%mm5, %%mm2 \n\t" | |
1882 "packssdw %%mm2, %%mm0 \n\t" | |
1883 "psraw $7, %%mm0 \n\t" | |
1884 | |
1885 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1886 "movd 15(%0, %%ebx), %%mm1 \n\t" | |
1887 "punpcklbw %%mm7, %%mm4 \n\t" | |
1888 "punpcklbw %%mm7, %%mm1 \n\t" | |
1889 "movd 18(%0, %%ebx), %%mm2 \n\t" | |
1890 "movd 21(%0, %%ebx), %%mm3 \n\t" | |
1891 "punpcklbw %%mm7, %%mm2 \n\t" | |
1892 "punpcklbw %%mm7, %%mm3 \n\t" | |
1893 "pmaddwd %%mm6, %%mm4 \n\t" | |
1894 "pmaddwd %%mm6, %%mm1 \n\t" | |
1895 "pmaddwd %%mm6, %%mm2 \n\t" | |
1896 "pmaddwd %%mm6, %%mm3 \n\t" | |
1897 #ifndef FAST_BGR2YV12 | |
1898 "psrad $8, %%mm4 \n\t" | |
1899 "psrad $8, %%mm1 \n\t" | |
1900 "psrad $8, %%mm2 \n\t" | |
1901 "psrad $8, %%mm3 \n\t" | |
1902 #endif | |
1903 "packssdw %%mm1, %%mm4 \n\t" | |
1904 "packssdw %%mm3, %%mm2 \n\t" | |
1905 "pmaddwd %%mm5, %%mm4 \n\t" | |
1906 "pmaddwd %%mm5, %%mm2 \n\t" | |
1907 "addl $24, %%ebx \n\t" | |
1908 "packssdw %%mm2, %%mm4 \n\t" | |
1909 "psraw $7, %%mm4 \n\t" | |
1910 | |
1911 "packuswb %%mm4, %%mm0 \n\t" | |
4923 | 1912 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" |
4622 | 1913 |
1914 MOVNTQ" %%mm0, (%1, %%eax) \n\t" | |
1915 "addl $8, %%eax \n\t" | |
1916 " js 1b \n\t" | |
1917 : : "r" (src+width*3), "r" (ydst+width), "g" (-width) | |
1918 : "%eax", "%ebx" | |
1919 ); | |
1920 ydst += lumStride; | |
1921 src += srcStride; | |
1922 } | |
1923 src -= srcStride*2; | |
1924 asm volatile( | |
1925 "movl %4, %%eax \n\t" | |
4923 | 1926 "movq "MANGLE(w1111)", %%mm5 \n\t" |
1927 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |
4622 | 1928 "pxor %%mm7, %%mm7 \n\t" |
1929 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1930 "addl %%ebx, %%ebx \n\t" | |
1931 ".balign 16 \n\t" | |
1932 "1: \n\t" | |
1933 PREFETCH" 64(%0, %%ebx) \n\t" | |
1934 PREFETCH" 64(%1, %%ebx) \n\t" | |
1935 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1936 "movq (%0, %%ebx), %%mm0 \n\t" | |
1937 "movq (%1, %%ebx), %%mm1 \n\t" | |
1938 "movq 6(%0, %%ebx), %%mm2 \n\t" | |
1939 "movq 6(%1, %%ebx), %%mm3 \n\t" | |
1940 PAVGB" %%mm1, %%mm0 \n\t" | |
1941 PAVGB" %%mm3, %%mm2 \n\t" | |
1942 "movq %%mm0, %%mm1 \n\t" | |
1943 "movq %%mm2, %%mm3 \n\t" | |
1944 "psrlq $24, %%mm0 \n\t" | |
1945 "psrlq $24, %%mm2 \n\t" | |
1946 PAVGB" %%mm1, %%mm0 \n\t" | |
1947 PAVGB" %%mm3, %%mm2 \n\t" | |
1948 "punpcklbw %%mm7, %%mm0 \n\t" | |
1949 "punpcklbw %%mm7, %%mm2 \n\t" | |
1950 #else | |
1951 "movd (%0, %%ebx), %%mm0 \n\t" | |
1952 "movd (%1, %%ebx), %%mm1 \n\t" | |
1953 "movd 3(%0, %%ebx), %%mm2 \n\t" | |
1954 "movd 3(%1, %%ebx), %%mm3 \n\t" | |
1955 "punpcklbw %%mm7, %%mm0 \n\t" | |
1956 "punpcklbw %%mm7, %%mm1 \n\t" | |
1957 "punpcklbw %%mm7, %%mm2 \n\t" | |
1958 "punpcklbw %%mm7, %%mm3 \n\t" | |
1959 "paddw %%mm1, %%mm0 \n\t" | |
1960 "paddw %%mm3, %%mm2 \n\t" | |
1961 "paddw %%mm2, %%mm0 \n\t" | |
1962 "movd 6(%0, %%ebx), %%mm4 \n\t" | |
1963 "movd 6(%1, %%ebx), %%mm1 \n\t" | |
1964 "movd 9(%0, %%ebx), %%mm2 \n\t" | |
1965 "movd 9(%1, %%ebx), %%mm3 \n\t" | |
1966 "punpcklbw %%mm7, %%mm4 \n\t" | |
1967 "punpcklbw %%mm7, %%mm1 \n\t" | |
1968 "punpcklbw %%mm7, %%mm2 \n\t" | |
1969 "punpcklbw %%mm7, %%mm3 \n\t" | |
1970 "paddw %%mm1, %%mm4 \n\t" | |
1971 "paddw %%mm3, %%mm2 \n\t" | |
1972 "paddw %%mm4, %%mm2 \n\t" | |
1973 "psrlw $2, %%mm0 \n\t" | |
1974 "psrlw $2, %%mm2 \n\t" | |
1975 #endif | |
4923 | 1976 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1977 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4622 | 1978 |
1979 "pmaddwd %%mm0, %%mm1 \n\t" | |
1980 "pmaddwd %%mm2, %%mm3 \n\t" | |
1981 "pmaddwd %%mm6, %%mm0 \n\t" | |
1982 "pmaddwd %%mm6, %%mm2 \n\t" | |
1983 #ifndef FAST_BGR2YV12 | |
1984 "psrad $8, %%mm0 \n\t" | |
1985 "psrad $8, %%mm1 \n\t" | |
1986 "psrad $8, %%mm2 \n\t" | |
1987 "psrad $8, %%mm3 \n\t" | |
1988 #endif | |
1989 "packssdw %%mm2, %%mm0 \n\t" | |
1990 "packssdw %%mm3, %%mm1 \n\t" | |
1991 "pmaddwd %%mm5, %%mm0 \n\t" | |
1992 "pmaddwd %%mm5, %%mm1 \n\t" | |
1993 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | |
1994 "psraw $7, %%mm0 \n\t" | |
1995 | |
1996 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1997 "movq 12(%0, %%ebx), %%mm4 \n\t" | |
1998 "movq 12(%1, %%ebx), %%mm1 \n\t" | |
1999 "movq 18(%0, %%ebx), %%mm2 \n\t" | |
2000 "movq 18(%1, %%ebx), %%mm3 \n\t" | |
2001 PAVGB" %%mm1, %%mm4 \n\t" | |
2002 PAVGB" %%mm3, %%mm2 \n\t" | |
2003 "movq %%mm4, %%mm1 \n\t" | |
2004 "movq %%mm2, %%mm3 \n\t" | |
2005 "psrlq $24, %%mm4 \n\t" | |
2006 "psrlq $24, %%mm2 \n\t" | |
2007 PAVGB" %%mm1, %%mm4 \n\t" | |
2008 PAVGB" %%mm3, %%mm2 \n\t" | |
2009 "punpcklbw %%mm7, %%mm4 \n\t" | |
2010 "punpcklbw %%mm7, %%mm2 \n\t" | |
2011 #else | |
2012 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
2013 "movd 12(%1, %%ebx), %%mm1 \n\t" | |
2014 "movd 15(%0, %%ebx), %%mm2 \n\t" | |
2015 "movd 15(%1, %%ebx), %%mm3 \n\t" | |
2016 "punpcklbw %%mm7, %%mm4 \n\t" | |
2017 "punpcklbw %%mm7, %%mm1 \n\t" | |
2018 "punpcklbw %%mm7, %%mm2 \n\t" | |
2019 "punpcklbw %%mm7, %%mm3 \n\t" | |
2020 "paddw %%mm1, %%mm4 \n\t" | |
2021 "paddw %%mm3, %%mm2 \n\t" | |
2022 "paddw %%mm2, %%mm4 \n\t" | |
2023 "movd 18(%0, %%ebx), %%mm5 \n\t" | |
2024 "movd 18(%1, %%ebx), %%mm1 \n\t" | |
2025 "movd 21(%0, %%ebx), %%mm2 \n\t" | |
2026 "movd 21(%1, %%ebx), %%mm3 \n\t" | |
2027 "punpcklbw %%mm7, %%mm5 \n\t" | |
2028 "punpcklbw %%mm7, %%mm1 \n\t" | |
2029 "punpcklbw %%mm7, %%mm2 \n\t" | |
2030 "punpcklbw %%mm7, %%mm3 \n\t" | |
2031 "paddw %%mm1, %%mm5 \n\t" | |
2032 "paddw %%mm3, %%mm2 \n\t" | |
2033 "paddw %%mm5, %%mm2 \n\t" | |
4923 | 2034 "movq "MANGLE(w1111)", %%mm5 \n\t" |
4622 | 2035 "psrlw $2, %%mm4 \n\t" |
2036 "psrlw $2, %%mm2 \n\t" | |
2037 #endif | |
4923 | 2038 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
2039 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4622 | 2040 |
2041 "pmaddwd %%mm4, %%mm1 \n\t" | |
2042 "pmaddwd %%mm2, %%mm3 \n\t" | |
2043 "pmaddwd %%mm6, %%mm4 \n\t" | |
2044 "pmaddwd %%mm6, %%mm2 \n\t" | |
2045 #ifndef FAST_BGR2YV12 | |
2046 "psrad $8, %%mm4 \n\t" | |
2047 "psrad $8, %%mm1 \n\t" | |
2048 "psrad $8, %%mm2 \n\t" | |
2049 "psrad $8, %%mm3 \n\t" | |
2050 #endif | |
2051 "packssdw %%mm2, %%mm4 \n\t" | |
2052 "packssdw %%mm3, %%mm1 \n\t" | |
2053 "pmaddwd %%mm5, %%mm4 \n\t" | |
2054 "pmaddwd %%mm5, %%mm1 \n\t" | |
2055 "addl $24, %%ebx \n\t" | |
2056 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | |
2057 "psraw $7, %%mm4 \n\t" | |
2058 | |
2059 "movq %%mm0, %%mm1 \n\t" | |
2060 "punpckldq %%mm4, %%mm0 \n\t" | |
2061 "punpckhdq %%mm4, %%mm1 \n\t" | |
2062 "packsswb %%mm1, %%mm0 \n\t" | |
4923 | 2063 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" |
4622 | 2064 |
2065 "movd %%mm0, (%2, %%eax) \n\t" | |
2066 "punpckhdq %%mm0, %%mm0 \n\t" | |
2067 "movd %%mm0, (%3, %%eax) \n\t" | |
2068 "addl $4, %%eax \n\t" | |
2069 " js 1b \n\t" | |
2070 : : "r" (src+width*6), "r" (src+srcStride+width*6), "r" (udst+width), "r" (vdst+width), "g" (-width) | |
2071 : "%eax", "%ebx" | |
2072 ); | |
2073 | |
2074 udst += chromStride; | |
2075 vdst += chromStride; | |
2076 src += srcStride*2; | |
2077 } | |
2078 | |
2079 asm volatile( EMMS" \n\t" | |
2080 SFENCE" \n\t" | |
2081 :::"memory"); | |
2082 #else | |
2083 y=0; | |
2084 #endif | |
2085 for(; y<height; y+=2) | |
3132 | 2086 { |
6492 | 2087 unsigned i; |
3132 | 2088 for(i=0; i<chromWidth; i++) |
2089 { | |
2090 unsigned int b= src[6*i+0]; | |
2091 unsigned int g= src[6*i+1]; | |
2092 unsigned int r= src[6*i+2]; | |
2801 | 2093 |
3633 | 2094 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
2095 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128; | |
2096 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128; | |
3132 | 2097 |
2098 udst[i] = U; | |
2099 vdst[i] = V; | |
2100 ydst[2*i] = Y; | |
2101 | |
2102 b= src[6*i+3]; | |
2103 g= src[6*i+4]; | |
2104 r= src[6*i+5]; | |
2105 | |
3633 | 2106 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
3132 | 2107 ydst[2*i+1] = Y; |
2108 } | |
2109 ydst += lumStride; | |
2110 src += srcStride; | |
2111 | |
2112 for(i=0; i<chromWidth; i++) | |
2113 { | |
2114 unsigned int b= src[6*i+0]; | |
2115 unsigned int g= src[6*i+1]; | |
2116 unsigned int r= src[6*i+2]; | |
2117 | |
3633 | 2118 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
3132 | 2119 |
2120 ydst[2*i] = Y; | |
2121 | |
2122 b= src[6*i+3]; | |
2123 g= src[6*i+4]; | |
2124 r= src[6*i+5]; | |
2125 | |
3633 | 2126 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
3132 | 2127 ydst[2*i+1] = Y; |
2128 } | |
2129 udst += chromStride; | |
2130 vdst += chromStride; | |
2131 ydst += lumStride; | |
2132 src += srcStride; | |
2133 } | |
2134 } | |
5337 | 2135 |
2136 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, | |
6492 | 2137 unsigned width, unsigned height, unsigned src1Stride, |
2138 unsigned src2Stride, unsigned dstStride){ | |
2139 unsigned h; | |
5337 | 2140 |
2141 for(h=0; h < height; h++) | |
2142 { | |
6492 | 2143 unsigned w; |
5337 | 2144 |
2145 #ifdef HAVE_MMX | |
2146 #ifdef HAVE_SSE2 | |
2147 asm( | |
2148 "xorl %%eax, %%eax \n\t" | |
2149 "1: \n\t" | |
2150 PREFETCH" 64(%1, %%eax) \n\t" | |
2151 PREFETCH" 64(%2, %%eax) \n\t" | |
2152 "movdqa (%1, %%eax), %%xmm0 \n\t" | |
2153 "movdqa (%1, %%eax), %%xmm1 \n\t" | |
2154 "movdqa (%2, %%eax), %%xmm2 \n\t" | |
2155 "punpcklbw %%xmm2, %%xmm0 \n\t" | |
2156 "punpckhbw %%xmm2, %%xmm1 \n\t" | |
2157 "movntdq %%xmm0, (%0, %%eax, 2) \n\t" | |
2158 "movntdq %%xmm1, 16(%0, %%eax, 2)\n\t" | |
2159 "addl $16, %%eax \n\t" | |
2160 "cmpl %3, %%eax \n\t" | |
2161 " jb 1b \n\t" | |
2162 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | |
2163 : "memory", "%eax" | |
2164 ); | |
2165 #else | |
2166 asm( | |
2167 "xorl %%eax, %%eax \n\t" | |
2168 "1: \n\t" | |
2169 PREFETCH" 64(%1, %%eax) \n\t" | |
2170 PREFETCH" 64(%2, %%eax) \n\t" | |
2171 "movq (%1, %%eax), %%mm0 \n\t" | |
2172 "movq 8(%1, %%eax), %%mm2 \n\t" | |
2173 "movq %%mm0, %%mm1 \n\t" | |
2174 "movq %%mm2, %%mm3 \n\t" | |
2175 "movq (%2, %%eax), %%mm4 \n\t" | |
2176 "movq 8(%2, %%eax), %%mm5 \n\t" | |
2177 "punpcklbw %%mm4, %%mm0 \n\t" | |
2178 "punpckhbw %%mm4, %%mm1 \n\t" | |
2179 "punpcklbw %%mm5, %%mm2 \n\t" | |
2180 "punpckhbw %%mm5, %%mm3 \n\t" | |
2181 MOVNTQ" %%mm0, (%0, %%eax, 2) \n\t" | |
2182 MOVNTQ" %%mm1, 8(%0, %%eax, 2) \n\t" | |
2183 MOVNTQ" %%mm2, 16(%0, %%eax, 2) \n\t" | |
2184 MOVNTQ" %%mm3, 24(%0, %%eax, 2) \n\t" | |
2185 "addl $16, %%eax \n\t" | |
2186 "cmpl %3, %%eax \n\t" | |
2187 " jb 1b \n\t" | |
2188 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | |
2189 : "memory", "%eax" | |
2190 ); | |
2191 #endif | |
2192 for(w= (width&(~15)); w < width; w++) | |
2193 { | |
2194 dest[2*w+0] = src1[w]; | |
2195 dest[2*w+1] = src2[w]; | |
2196 } | |
2197 #else | |
2198 for(w=0; w < width; w++) | |
2199 { | |
2200 dest[2*w+0] = src1[w]; | |
2201 dest[2*w+1] = src2[w]; | |
2202 } | |
2203 #endif | |
2204 dest += dstStride; | |
2205 src1 += src1Stride; | |
2206 src2 += src2Stride; | |
2207 } | |
2208 #ifdef HAVE_MMX | |
2209 asm( | |
2210 EMMS" \n\t" | |
2211 SFENCE" \n\t" | |
2212 ::: "memory" | |
2213 ); | |
2214 #endif | |
2215 } | |
6606
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2216 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2217 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2218 uint8_t *dst1, uint8_t *dst2, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2219 unsigned width, unsigned height, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2220 unsigned srcStride1, unsigned srcStride2, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2221 unsigned dstStride1, unsigned dstStride2) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2222 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2223 unsigned y,x,w,h; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2224 w=width/2; h=height/2; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2225 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2226 asm volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2227 PREFETCH" %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2228 PREFETCH" %1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2229 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2230 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2231 for(y=0;y<h;y++){ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2232 const uint8_t* s1=src1+srcStride1*(y>>1); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2233 uint8_t* d=dst1+dstStride1*y; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2234 x=0; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2235 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2236 if(w > 32) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2237 for(;x<w;x+=32) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2238 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2239 asm volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2240 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2241 "movq %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2242 "movq 8%1, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2243 "movq 16%1, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2244 "movq 24%1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2245 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2246 "movq %%mm2, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2247 "movq %%mm4, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2248 "movq %%mm6, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2249 "punpcklbw %%mm0, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2250 "punpckhbw %%mm1, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2251 "punpcklbw %%mm2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2252 "punpckhbw %%mm3, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2253 "punpcklbw %%mm4, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2254 "punpckhbw %%mm5, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2255 "punpcklbw %%mm6, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2256 "punpckhbw %%mm7, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2257 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2258 MOVNTQ" %%mm1, 8%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2259 MOVNTQ" %%mm2, 16%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2260 MOVNTQ" %%mm3, 24%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2261 MOVNTQ" %%mm4, 32%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2262 MOVNTQ" %%mm5, 40%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2263 MOVNTQ" %%mm6, 48%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2264 MOVNTQ" %%mm7, 56%0" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2265 :"=m"(d[2*x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2266 :"m"(s1[x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2267 :"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2268 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2269 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2270 for(;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2271 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2272 for(y=0;y<h;y++){ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2273 const uint8_t* s2=src2+srcStride2*(y>>1); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2274 uint8_t* d=dst2+dstStride2*y; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2275 x=0; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2276 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2277 if(w > 32) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2278 for(;x<w;x+=32) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2279 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2280 asm volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2281 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2282 "movq %1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2283 "movq 8%1, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2284 "movq 16%1, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2285 "movq 24%1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2286 "movq %%mm0, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2287 "movq %%mm2, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2288 "movq %%mm4, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2289 "movq %%mm6, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2290 "punpcklbw %%mm0, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2291 "punpckhbw %%mm1, %%mm1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2292 "punpcklbw %%mm2, %%mm2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2293 "punpckhbw %%mm3, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2294 "punpcklbw %%mm4, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2295 "punpckhbw %%mm5, %%mm5\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2296 "punpcklbw %%mm6, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2297 "punpckhbw %%mm7, %%mm7\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2298 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2299 MOVNTQ" %%mm1, 8%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2300 MOVNTQ" %%mm2, 16%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2301 MOVNTQ" %%mm3, 24%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2302 MOVNTQ" %%mm4, 32%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2303 MOVNTQ" %%mm5, 40%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2304 MOVNTQ" %%mm6, 48%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2305 MOVNTQ" %%mm7, 56%0" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2306 :"=m"(d[2*x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2307 :"m"(s2[x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2308 :"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2309 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2310 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2311 for(;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2312 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2313 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2314 asm( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2315 EMMS" \n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2316 SFENCE" \n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2317 ::: "memory" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2318 ); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2319 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2320 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2321 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2322 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2323 uint8_t *dst, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2324 unsigned width, unsigned height, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2325 unsigned srcStride1, unsigned srcStride2, |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2326 unsigned srcStride3, unsigned dstStride) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2327 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2328 unsigned y,x,x2,w,h; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2329 w=width/2; h=height; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2330 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2331 asm volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2332 PREFETCH" %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2333 PREFETCH" %1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2334 PREFETCH" %2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2335 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)),"m"(*(src3+srcStride3)):"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2336 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2337 for(y=0;y<h;y++){ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2338 const uint8_t* yp=src1+srcStride1*y; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2339 const uint8_t* up=src2+srcStride2*(y>>2); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2340 const uint8_t* vp=src3+srcStride3*(y>>2); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2341 uint8_t* d=dst+dstStride*y; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2342 x2=0; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2343 x=0; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2344 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2345 for(;x<w;x+=8,x2+=32) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2346 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2347 asm volatile( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2348 PREFETCH" 32%1\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2349 PREFETCH" 32%2\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2350 PREFETCH" 32%3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2351 "movq %1, %%mm0\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2352 "movq %2, %%mm1\n\t" /* U0U1U2U3U4U5U6U7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2353 "movq %3, %%mm2\n\t" /* V0V1V2V3V4V5V6V7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2354 "movq %%mm0, %%mm3\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2355 "movq %%mm1, %%mm4\n\t" /* U0U1U2U3U4U5U6U7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2356 "movq %%mm2, %%mm5\n\t" /* V0V1V2V3V4V5V6V7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2357 "punpcklbw %%mm1, %%mm1\n\t" /* U0U0 U1U1 U2U2 U3U3 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2358 "punpcklbw %%mm2, %%mm2\n\t" /* V0V0 V1V1 V2V2 V3V3 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2359 "punpckhbw %%mm4, %%mm4\n\t" /* U4U4 U5U5 U6U6 U7U7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2360 "punpckhbw %%mm5, %%mm5\n\t" /* V4V4 V5V5 V6V6 V7V7 */ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2361 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2362 "movq %%mm1, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2363 "punpcklbw %%mm2, %%mm1\n\t" /* U0V0 U0V0 U1V1 U1V1*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2364 "punpcklbw %%mm1, %%mm0\n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2365 "punpckhbw %%mm1, %%mm3\n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2366 MOVNTQ" %%mm0, %0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2367 MOVNTQ" %%mm3, 8%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2368 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2369 "punpckhbw %%mm2, %%mm6\n\t" /* U2V2 U2V2 U3V3 U3V3*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2370 "movq 8%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2371 "movq %%mm0, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2372 "punpcklbw %%mm6, %%mm0\n\t" /* Y U2 Y V2 Y U2 Y V2*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2373 "punpckhbw %%mm6, %%mm3\n\t" /* Y U3 Y V3 Y U3 Y V3*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2374 MOVNTQ" %%mm0, 16%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2375 MOVNTQ" %%mm3, 24%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2376 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2377 "movq %%mm4, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2378 "movq 16%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2379 "movq %%mm0, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2380 "punpcklbw %%mm5, %%mm4\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2381 "punpcklbw %%mm4, %%mm0\n\t" /* Y U4 Y V4 Y U4 Y V4*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2382 "punpckhbw %%mm4, %%mm3\n\t" /* Y U5 Y V5 Y U5 Y V5*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2383 MOVNTQ" %%mm0, 32%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2384 MOVNTQ" %%mm3, 40%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2385 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2386 "punpckhbw %%mm5, %%mm6\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2387 "movq 24%1, %%mm0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2388 "movq %%mm0, %%mm3\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2389 "punpcklbw %%mm6, %%mm0\n\t" /* Y U6 Y V6 Y U6 Y V6*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2390 "punpckhbw %%mm6, %%mm3\n\t" /* Y U7 Y V7 Y U7 Y V7*/ |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2391 MOVNTQ" %%mm0, 48%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2392 MOVNTQ" %%mm3, 56%0\n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2393 |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2394 :"=m"(d[8*x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2395 :"m"(yp[x2]),"m"(up[x]),"m"(vp[x]) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2396 :"memory"); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2397 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2398 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2399 for(;x<w;x++,x2+=4) |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2400 { |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2401 d[8*x+0]=yp[x2]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2402 d[8*x+1]=up[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2403 d[8*x+2]=yp[x2+1]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2404 d[8*x+3]=vp[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2405 d[8*x+4]=yp[x2+2]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2406 d[8*x+5]=up[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2407 d[8*x+6]=yp[x2+3]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2408 d[8*x+7]=vp[x]; |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2409 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2410 } |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2411 #ifdef HAVE_MMX |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2412 asm( |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2413 EMMS" \n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2414 SFENCE" \n\t" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2415 ::: "memory" |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2416 ); |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2417 #endif |
50b5d8367318
merging changes from mplayerxp (rgb2rgb*.{c,h} only)
michael
parents:
6605
diff
changeset
|
2418 } |