Mercurial > mplayer.hg
comparison libswscale/rgb2rgb_template.c @ 23140:4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
author | diego |
---|---|
date | Sat, 28 Apr 2007 11:44:49 +0000 |
parents | 10425310d2da |
children | f9a8f92087ef |
comparison
equal
deleted
inserted
replaced
23139:10425310d2da | 23140:4d3870361b73 |
---|---|
1 /* | 1 /* |
2 * | |
3 * rgb2rgb.c, Software RGB to RGB convertor | 2 * rgb2rgb.c, Software RGB to RGB convertor |
4 * pluralize by Software PAL8 to RGB convertor | 3 * pluralize by Software PAL8 to RGB convertor |
5 * Software YUV to YUV convertor | 4 * Software YUV to YUV convertor |
6 * Software YUV to RGB convertor | 5 * Software YUV to RGB convertor |
7 * Written by Nick Kurshev. | 6 * Written by Nick Kurshev. |
51 #endif | 50 #endif |
52 | 51 |
53 #ifdef HAVE_3DNOW | 52 #ifdef HAVE_3DNOW |
54 #define PREFETCH "prefetch" | 53 #define PREFETCH "prefetch" |
55 #define PREFETCHW "prefetchw" | 54 #define PREFETCHW "prefetchw" |
56 #define PAVGB "pavgusb" | 55 #define PAVGB "pavgusb" |
57 #elif defined ( HAVE_MMX2 ) | 56 #elif defined ( HAVE_MMX2 ) |
58 #define PREFETCH "prefetchnta" | 57 #define PREFETCH "prefetchnta" |
59 #define PREFETCHW "prefetcht0" | 58 #define PREFETCHW "prefetcht0" |
60 #define PAVGB "pavgb" | 59 #define PAVGB "pavgb" |
61 #else | 60 #else |
62 #ifdef __APPLE__ | 61 #ifdef __APPLE__ |
63 #define PREFETCH "#" | 62 #define PREFETCH "#" |
64 #define PREFETCHW "#" | 63 #define PREFETCHW "#" |
65 #else | 64 #else |
83 #define SFENCE " # nop" | 82 #define SFENCE " # nop" |
84 #endif | 83 #endif |
85 | 84 |
86 static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size) | 85 static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size) |
87 { | 86 { |
88 uint8_t *dest = dst; | 87 uint8_t *dest = dst; |
89 const uint8_t *s = src; | 88 const uint8_t *s = src; |
90 const uint8_t *end; | 89 const uint8_t *end; |
91 #ifdef HAVE_MMX | 90 #ifdef HAVE_MMX |
92 const uint8_t *mm_end; | 91 const uint8_t *mm_end; |
93 #endif | 92 #endif |
94 end = s + src_size; | 93 end = s + src_size; |
95 #ifdef HAVE_MMX | 94 #ifdef HAVE_MMX |
96 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 95 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
97 mm_end = end - 23; | 96 mm_end = end - 23; |
98 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); | 97 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); |
99 while(s < mm_end) | 98 while (s < mm_end) |
100 { | 99 { |
101 __asm __volatile( | 100 __asm __volatile( |
102 PREFETCH" 32%1\n\t" | 101 PREFETCH" 32%1 \n\t" |
103 "movd %1, %%mm0\n\t" | 102 "movd %1, %%mm0 \n\t" |
104 "punpckldq 3%1, %%mm0\n\t" | 103 "punpckldq 3%1, %%mm0 \n\t" |
105 "movd 6%1, %%mm1\n\t" | 104 "movd 6%1, %%mm1 \n\t" |
106 "punpckldq 9%1, %%mm1\n\t" | 105 "punpckldq 9%1, %%mm1 \n\t" |
107 "movd 12%1, %%mm2\n\t" | 106 "movd 12%1, %%mm2 \n\t" |
108 "punpckldq 15%1, %%mm2\n\t" | 107 "punpckldq 15%1, %%mm2 \n\t" |
109 "movd 18%1, %%mm3\n\t" | 108 "movd 18%1, %%mm3 \n\t" |
110 "punpckldq 21%1, %%mm3\n\t" | 109 "punpckldq 21%1, %%mm3 \n\t" |
111 "pand %%mm7, %%mm0\n\t" | 110 "pand %%mm7, %%mm0 \n\t" |
112 "pand %%mm7, %%mm1\n\t" | 111 "pand %%mm7, %%mm1 \n\t" |
113 "pand %%mm7, %%mm2\n\t" | 112 "pand %%mm7, %%mm2 \n\t" |
114 "pand %%mm7, %%mm3\n\t" | 113 "pand %%mm7, %%mm3 \n\t" |
115 MOVNTQ" %%mm0, %0\n\t" | 114 MOVNTQ" %%mm0, %0 \n\t" |
116 MOVNTQ" %%mm1, 8%0\n\t" | 115 MOVNTQ" %%mm1, 8%0 \n\t" |
117 MOVNTQ" %%mm2, 16%0\n\t" | 116 MOVNTQ" %%mm2, 16%0 \n\t" |
118 MOVNTQ" %%mm3, 24%0" | 117 MOVNTQ" %%mm3, 24%0" |
119 :"=m"(*dest) | 118 :"=m"(*dest) |
120 :"m"(*s) | 119 :"m"(*s) |
121 :"memory"); | 120 :"memory"); |
122 dest += 32; | 121 dest += 32; |
123 s += 24; | 122 s += 24; |
124 } | 123 } |
125 __asm __volatile(SFENCE:::"memory"); | 124 __asm __volatile(SFENCE:::"memory"); |
126 __asm __volatile(EMMS:::"memory"); | 125 __asm __volatile(EMMS:::"memory"); |
127 #endif | 126 #endif |
128 while(s < end) | 127 while (s < end) |
129 { | 128 { |
129 #ifdef WORDS_BIGENDIAN | |
130 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ | |
131 *dest++ = 0; | |
132 *dest++ = s[2]; | |
133 *dest++ = s[1]; | |
134 *dest++ = s[0]; | |
135 s+=3; | |
136 #else | |
137 *dest++ = *s++; | |
138 *dest++ = *s++; | |
139 *dest++ = *s++; | |
140 *dest++ = 0; | |
141 #endif | |
142 } | |
143 } | |
144 | |
145 static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,long src_size) | |
146 { | |
147 uint8_t *dest = dst; | |
148 const uint8_t *s = src; | |
149 const uint8_t *end; | |
150 #ifdef HAVE_MMX | |
151 const uint8_t *mm_end; | |
152 #endif | |
153 end = s + src_size; | |
154 #ifdef HAVE_MMX | |
155 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
156 mm_end = end - 31; | |
157 while (s < mm_end) | |
158 { | |
159 __asm __volatile( | |
160 PREFETCH" 32%1 \n\t" | |
161 "movq %1, %%mm0 \n\t" | |
162 "movq 8%1, %%mm1 \n\t" | |
163 "movq 16%1, %%mm4 \n\t" | |
164 "movq 24%1, %%mm5 \n\t" | |
165 "movq %%mm0, %%mm2 \n\t" | |
166 "movq %%mm1, %%mm3 \n\t" | |
167 "movq %%mm4, %%mm6 \n\t" | |
168 "movq %%mm5, %%mm7 \n\t" | |
169 "psrlq $8, %%mm2 \n\t" | |
170 "psrlq $8, %%mm3 \n\t" | |
171 "psrlq $8, %%mm6 \n\t" | |
172 "psrlq $8, %%mm7 \n\t" | |
173 "pand %2, %%mm0 \n\t" | |
174 "pand %2, %%mm1 \n\t" | |
175 "pand %2, %%mm4 \n\t" | |
176 "pand %2, %%mm5 \n\t" | |
177 "pand %3, %%mm2 \n\t" | |
178 "pand %3, %%mm3 \n\t" | |
179 "pand %3, %%mm6 \n\t" | |
180 "pand %3, %%mm7 \n\t" | |
181 "por %%mm2, %%mm0 \n\t" | |
182 "por %%mm3, %%mm1 \n\t" | |
183 "por %%mm6, %%mm4 \n\t" | |
184 "por %%mm7, %%mm5 \n\t" | |
185 | |
186 "movq %%mm1, %%mm2 \n\t" | |
187 "movq %%mm4, %%mm3 \n\t" | |
188 "psllq $48, %%mm2 \n\t" | |
189 "psllq $32, %%mm3 \n\t" | |
190 "pand %4, %%mm2 \n\t" | |
191 "pand %5, %%mm3 \n\t" | |
192 "por %%mm2, %%mm0 \n\t" | |
193 "psrlq $16, %%mm1 \n\t" | |
194 "psrlq $32, %%mm4 \n\t" | |
195 "psllq $16, %%mm5 \n\t" | |
196 "por %%mm3, %%mm1 \n\t" | |
197 "pand %6, %%mm5 \n\t" | |
198 "por %%mm5, %%mm4 \n\t" | |
199 | |
200 MOVNTQ" %%mm0, %0 \n\t" | |
201 MOVNTQ" %%mm1, 8%0 \n\t" | |
202 MOVNTQ" %%mm4, 16%0" | |
203 :"=m"(*dest) | |
204 :"m"(*s),"m"(mask24l), | |
205 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
206 :"memory"); | |
207 dest += 24; | |
208 s += 32; | |
209 } | |
210 __asm __volatile(SFENCE:::"memory"); | |
211 __asm __volatile(EMMS:::"memory"); | |
212 #endif | |
213 while (s < end) | |
214 { | |
130 #ifdef WORDS_BIGENDIAN | 215 #ifdef WORDS_BIGENDIAN |
131 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ | 216 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ |
132 *dest++ = 0; | 217 s++; |
133 *dest++ = s[2]; | 218 dest[2] = *s++; |
134 *dest++ = s[1]; | 219 dest[1] = *s++; |
135 *dest++ = s[0]; | 220 dest[0] = *s++; |
136 s+=3; | 221 dest += 3; |
137 #else | 222 #else |
138 *dest++ = *s++; | 223 *dest++ = *s++; |
139 *dest++ = *s++; | 224 *dest++ = *s++; |
140 *dest++ = *s++; | 225 *dest++ = *s++; |
141 *dest++ = 0; | 226 s++; |
142 #endif | 227 #endif |
143 } | 228 } |
144 } | |
145 | |
146 static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,long src_size) | |
147 { | |
148 uint8_t *dest = dst; | |
149 const uint8_t *s = src; | |
150 const uint8_t *end; | |
151 #ifdef HAVE_MMX | |
152 const uint8_t *mm_end; | |
153 #endif | |
154 end = s + src_size; | |
155 #ifdef HAVE_MMX | |
156 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
157 mm_end = end - 31; | |
158 while(s < mm_end) | |
159 { | |
160 __asm __volatile( | |
161 PREFETCH" 32%1\n\t" | |
162 "movq %1, %%mm0\n\t" | |
163 "movq 8%1, %%mm1\n\t" | |
164 "movq 16%1, %%mm4\n\t" | |
165 "movq 24%1, %%mm5\n\t" | |
166 "movq %%mm0, %%mm2\n\t" | |
167 "movq %%mm1, %%mm3\n\t" | |
168 "movq %%mm4, %%mm6\n\t" | |
169 "movq %%mm5, %%mm7\n\t" | |
170 "psrlq $8, %%mm2\n\t" | |
171 "psrlq $8, %%mm3\n\t" | |
172 "psrlq $8, %%mm6\n\t" | |
173 "psrlq $8, %%mm7\n\t" | |
174 "pand %2, %%mm0\n\t" | |
175 "pand %2, %%mm1\n\t" | |
176 "pand %2, %%mm4\n\t" | |
177 "pand %2, %%mm5\n\t" | |
178 "pand %3, %%mm2\n\t" | |
179 "pand %3, %%mm3\n\t" | |
180 "pand %3, %%mm6\n\t" | |
181 "pand %3, %%mm7\n\t" | |
182 "por %%mm2, %%mm0\n\t" | |
183 "por %%mm3, %%mm1\n\t" | |
184 "por %%mm6, %%mm4\n\t" | |
185 "por %%mm7, %%mm5\n\t" | |
186 | |
187 "movq %%mm1, %%mm2\n\t" | |
188 "movq %%mm4, %%mm3\n\t" | |
189 "psllq $48, %%mm2\n\t" | |
190 "psllq $32, %%mm3\n\t" | |
191 "pand %4, %%mm2\n\t" | |
192 "pand %5, %%mm3\n\t" | |
193 "por %%mm2, %%mm0\n\t" | |
194 "psrlq $16, %%mm1\n\t" | |
195 "psrlq $32, %%mm4\n\t" | |
196 "psllq $16, %%mm5\n\t" | |
197 "por %%mm3, %%mm1\n\t" | |
198 "pand %6, %%mm5\n\t" | |
199 "por %%mm5, %%mm4\n\t" | |
200 | |
201 MOVNTQ" %%mm0, %0\n\t" | |
202 MOVNTQ" %%mm1, 8%0\n\t" | |
203 MOVNTQ" %%mm4, 16%0" | |
204 :"=m"(*dest) | |
205 :"m"(*s),"m"(mask24l), | |
206 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
207 :"memory"); | |
208 dest += 24; | |
209 s += 32; | |
210 } | |
211 __asm __volatile(SFENCE:::"memory"); | |
212 __asm __volatile(EMMS:::"memory"); | |
213 #endif | |
214 while(s < end) | |
215 { | |
216 #ifdef WORDS_BIGENDIAN | |
217 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ | |
218 s++; | |
219 dest[2] = *s++; | |
220 dest[1] = *s++; | |
221 dest[0] = *s++; | |
222 dest += 3; | |
223 #else | |
224 *dest++ = *s++; | |
225 *dest++ = *s++; | |
226 *dest++ = *s++; | |
227 s++; | |
228 #endif | |
229 } | |
230 } | 229 } |
231 | 230 |
232 /* | 231 /* |
233 Original by Strepto/Astral | 232 Original by Strepto/Astral |
234 ported to gcc & bugfixed : A'rpi | 233 ported to gcc & bugfixed : A'rpi |
235 MMX2, 3DNOW optimization by Nick Kurshev | 234 MMX2, 3DNOW optimization by Nick Kurshev |
236 32bit c version, and and&add trick by Michael Niedermayer | 235 32bit c version, and and&add trick by Michael Niedermayer |
237 */ | 236 */ |
238 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size) | 237 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size) |
239 { | 238 { |
240 register const uint8_t* s=src; | 239 register const uint8_t* s=src; |
241 register uint8_t* d=dst; | 240 register uint8_t* d=dst; |
242 register const uint8_t *end; | 241 register const uint8_t *end; |
243 const uint8_t *mm_end; | 242 const uint8_t *mm_end; |
244 end = s + src_size; | 243 end = s + src_size; |
245 #ifdef HAVE_MMX | 244 #ifdef HAVE_MMX |
246 __asm __volatile(PREFETCH" %0"::"m"(*s)); | 245 __asm __volatile(PREFETCH" %0"::"m"(*s)); |
247 __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); | 246 __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); |
248 mm_end = end - 15; | 247 mm_end = end - 15; |
249 while(s<mm_end) | 248 while (s<mm_end) |
250 { | 249 { |
251 __asm __volatile( | 250 __asm __volatile( |
252 PREFETCH" 32%1\n\t" | 251 PREFETCH" 32%1 \n\t" |
253 "movq %1, %%mm0\n\t" | 252 "movq %1, %%mm0 \n\t" |
254 "movq 8%1, %%mm2\n\t" | 253 "movq 8%1, %%mm2 \n\t" |
255 "movq %%mm0, %%mm1\n\t" | 254 "movq %%mm0, %%mm1 \n\t" |
256 "movq %%mm2, %%mm3\n\t" | 255 "movq %%mm2, %%mm3 \n\t" |
257 "pand %%mm4, %%mm0\n\t" | 256 "pand %%mm4, %%mm0 \n\t" |
258 "pand %%mm4, %%mm2\n\t" | 257 "pand %%mm4, %%mm2 \n\t" |
259 "paddw %%mm1, %%mm0\n\t" | 258 "paddw %%mm1, %%mm0 \n\t" |
260 "paddw %%mm3, %%mm2\n\t" | 259 "paddw %%mm3, %%mm2 \n\t" |
261 MOVNTQ" %%mm0, %0\n\t" | 260 MOVNTQ" %%mm0, %0 \n\t" |
262 MOVNTQ" %%mm2, 8%0" | 261 MOVNTQ" %%mm2, 8%0" |
263 :"=m"(*d) | 262 :"=m"(*d) |
264 :"m"(*s) | 263 :"m"(*s) |
265 ); | 264 ); |
266 d+=16; | 265 d+=16; |
267 s+=16; | 266 s+=16; |
268 } | 267 } |
269 __asm __volatile(SFENCE:::"memory"); | 268 __asm __volatile(SFENCE:::"memory"); |
270 __asm __volatile(EMMS:::"memory"); | 269 __asm __volatile(EMMS:::"memory"); |
271 #endif | 270 #endif |
272 mm_end = end - 3; | 271 mm_end = end - 3; |
273 while(s < mm_end) | 272 while (s < mm_end) |
274 { | 273 { |
275 register unsigned x= *((uint32_t *)s); | 274 register unsigned x= *((uint32_t *)s); |
276 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); | 275 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); |
277 d+=4; | 276 d+=4; |
278 s+=4; | 277 s+=4; |
279 } | 278 } |
280 if(s < end) | 279 if (s < end) |
281 { | 280 { |
282 register unsigned short x= *((uint16_t *)s); | 281 register unsigned short x= *((uint16_t *)s); |
283 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); | 282 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); |
284 } | 283 } |
285 } | 284 } |
286 | 285 |
287 static inline void RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,long src_size) | 286 static inline void RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,long src_size) |
288 { | 287 { |
289 register const uint8_t* s=src; | 288 register const uint8_t* s=src; |
290 register uint8_t* d=dst; | 289 register uint8_t* d=dst; |
291 register const uint8_t *end; | 290 register const uint8_t *end; |
292 const uint8_t *mm_end; | 291 const uint8_t *mm_end; |
293 end = s + src_size; | 292 end = s + src_size; |
294 #ifdef HAVE_MMX | 293 #ifdef HAVE_MMX |
295 __asm __volatile(PREFETCH" %0"::"m"(*s)); | 294 __asm __volatile(PREFETCH" %0"::"m"(*s)); |
296 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg)); | 295 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg)); |
297 __asm __volatile("movq %0, %%mm6"::"m"(mask15b)); | 296 __asm __volatile("movq %0, %%mm6"::"m"(mask15b)); |
298 mm_end = end - 15; | 297 mm_end = end - 15; |
299 while(s<mm_end) | 298 while (s<mm_end) |
300 { | 299 { |
301 __asm __volatile( | 300 __asm __volatile( |
302 PREFETCH" 32%1\n\t" | 301 PREFETCH" 32%1 \n\t" |
303 "movq %1, %%mm0\n\t" | 302 "movq %1, %%mm0 \n\t" |
304 "movq 8%1, %%mm2\n\t" | 303 "movq 8%1, %%mm2 \n\t" |
305 "movq %%mm0, %%mm1\n\t" | 304 "movq %%mm0, %%mm1 \n\t" |
306 "movq %%mm2, %%mm3\n\t" | 305 "movq %%mm2, %%mm3 \n\t" |
307 "psrlq $1, %%mm0\n\t" | 306 "psrlq $1, %%mm0 \n\t" |
308 "psrlq $1, %%mm2\n\t" | 307 "psrlq $1, %%mm2 \n\t" |
309 "pand %%mm7, %%mm0\n\t" | 308 "pand %%mm7, %%mm0 \n\t" |
310 "pand %%mm7, %%mm2\n\t" | 309 "pand %%mm7, %%mm2 \n\t" |
311 "pand %%mm6, %%mm1\n\t" | 310 "pand %%mm6, %%mm1 \n\t" |
312 "pand %%mm6, %%mm3\n\t" | 311 "pand %%mm6, %%mm3 \n\t" |
313 "por %%mm1, %%mm0\n\t" | 312 "por %%mm1, %%mm0 \n\t" |
314 "por %%mm3, %%mm2\n\t" | 313 "por %%mm3, %%mm2 \n\t" |
315 MOVNTQ" %%mm0, %0\n\t" | 314 MOVNTQ" %%mm0, %0 \n\t" |
316 MOVNTQ" %%mm2, 8%0" | 315 MOVNTQ" %%mm2, 8%0" |
317 :"=m"(*d) | 316 :"=m"(*d) |
318 :"m"(*s) | 317 :"m"(*s) |
319 ); | 318 ); |
320 d+=16; | 319 d+=16; |
321 s+=16; | 320 s+=16; |
322 } | 321 } |
323 __asm __volatile(SFENCE:::"memory"); | 322 __asm __volatile(SFENCE:::"memory"); |
324 __asm __volatile(EMMS:::"memory"); | 323 __asm __volatile(EMMS:::"memory"); |
325 #endif | 324 #endif |
326 mm_end = end - 3; | 325 mm_end = end - 3; |
327 while(s < mm_end) | 326 while (s < mm_end) |
328 { | 327 { |
329 register uint32_t x= *((uint32_t *)s); | 328 register uint32_t x= *((uint32_t *)s); |
330 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); | 329 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); |
331 s+=4; | 330 s+=4; |
332 d+=4; | 331 d+=4; |
333 } | 332 } |
334 if(s < end) | 333 if (s < end) |
335 { | 334 { |
336 register uint16_t x= *((uint16_t *)s); | 335 register uint16_t x= *((uint16_t *)s); |
337 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); | 336 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); |
338 s+=2; | 337 s+=2; |
339 d+=2; | 338 d+=2; |
340 } | 339 } |
341 } | 340 } |
342 | 341 |
343 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size) | 342 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size) |
344 { | 343 { |
345 const uint8_t *s = src; | 344 const uint8_t *s = src; |
346 const uint8_t *end; | 345 const uint8_t *end; |
347 #ifdef HAVE_MMX | 346 #ifdef HAVE_MMX |
348 const uint8_t *mm_end; | 347 const uint8_t *mm_end; |
349 #endif | 348 #endif |
350 uint16_t *d = (uint16_t *)dst; | 349 uint16_t *d = (uint16_t *)dst; |
351 end = s + src_size; | 350 end = s + src_size; |
352 #ifdef HAVE_MMX | 351 #ifdef HAVE_MMX |
353 mm_end = end - 15; | 352 mm_end = end - 15; |
354 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) | 353 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) |
355 asm volatile( | 354 asm volatile( |
356 "movq %3, %%mm5 \n\t" | 355 "movq %3, %%mm5 \n\t" |
357 "movq %4, %%mm6 \n\t" | 356 "movq %4, %%mm6 \n\t" |
358 "movq %5, %%mm7 \n\t" | 357 "movq %5, %%mm7 \n\t" |
359 "jmp 2f \n\t" | 358 "jmp 2f \n\t" |
360 ASMALIGN(4) | 359 ASMALIGN(4) |
361 "1: \n\t" | 360 "1: \n\t" |
362 PREFETCH" 32(%1) \n\t" | 361 PREFETCH" 32(%1) \n\t" |
363 "movd (%1), %%mm0 \n\t" | 362 "movd (%1), %%mm0 \n\t" |
364 "movd 4(%1), %%mm3 \n\t" | 363 "movd 4(%1), %%mm3 \n\t" |
365 "punpckldq 8(%1), %%mm0 \n\t" | 364 "punpckldq 8(%1), %%mm0 \n\t" |
366 "punpckldq 12(%1), %%mm3 \n\t" | 365 "punpckldq 12(%1), %%mm3 \n\t" |
367 "movq %%mm0, %%mm1 \n\t" | 366 "movq %%mm0, %%mm1 \n\t" |
368 "movq %%mm3, %%mm4 \n\t" | 367 "movq %%mm3, %%mm4 \n\t" |
369 "pand %%mm6, %%mm0 \n\t" | 368 "pand %%mm6, %%mm0 \n\t" |
370 "pand %%mm6, %%mm3 \n\t" | 369 "pand %%mm6, %%mm3 \n\t" |
371 "pmaddwd %%mm7, %%mm0 \n\t" | 370 "pmaddwd %%mm7, %%mm0 \n\t" |
372 "pmaddwd %%mm7, %%mm3 \n\t" | 371 "pmaddwd %%mm7, %%mm3 \n\t" |
373 "pand %%mm5, %%mm1 \n\t" | 372 "pand %%mm5, %%mm1 \n\t" |
374 "pand %%mm5, %%mm4 \n\t" | 373 "pand %%mm5, %%mm4 \n\t" |
375 "por %%mm1, %%mm0 \n\t" | 374 "por %%mm1, %%mm0 \n\t" |
376 "por %%mm4, %%mm3 \n\t" | 375 "por %%mm4, %%mm3 \n\t" |
377 "psrld $5, %%mm0 \n\t" | 376 "psrld $5, %%mm0 \n\t" |
378 "pslld $11, %%mm3 \n\t" | 377 "pslld $11, %%mm3 \n\t" |
379 "por %%mm3, %%mm0 \n\t" | 378 "por %%mm3, %%mm0 \n\t" |
380 MOVNTQ" %%mm0, (%0) \n\t" | 379 MOVNTQ" %%mm0, (%0) \n\t" |
381 "add $16, %1 \n\t" | 380 "add $16, %1 \n\t" |
382 "add $8, %0 \n\t" | 381 "add $8, %0 \n\t" |
383 "2: \n\t" | 382 "2: \n\t" |
384 "cmp %2, %1 \n\t" | 383 "cmp %2, %1 \n\t" |
385 " jb 1b \n\t" | 384 " jb 1b \n\t" |
386 : "+r" (d), "+r"(s) | 385 : "+r" (d), "+r"(s) |
387 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) | 386 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) |
388 ); | 387 ); |
389 #else | 388 #else |
390 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 389 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
391 __asm __volatile( | 390 __asm __volatile( |
392 "movq %0, %%mm7\n\t" | 391 "movq %0, %%mm7 \n\t" |
393 "movq %1, %%mm6\n\t" | 392 "movq %1, %%mm6 \n\t" |
394 ::"m"(red_16mask),"m"(green_16mask)); | 393 ::"m"(red_16mask),"m"(green_16mask)); |
395 while(s < mm_end) | 394 while (s < mm_end) |
396 { | 395 { |
397 __asm __volatile( | 396 __asm __volatile( |
398 PREFETCH" 32%1\n\t" | 397 PREFETCH" 32%1 \n\t" |
399 "movd %1, %%mm0\n\t" | 398 "movd %1, %%mm0 \n\t" |
400 "movd 4%1, %%mm3\n\t" | 399 "movd 4%1, %%mm3 \n\t" |
401 "punpckldq 8%1, %%mm0\n\t" | 400 "punpckldq 8%1, %%mm0 \n\t" |
402 "punpckldq 12%1, %%mm3\n\t" | 401 "punpckldq 12%1, %%mm3 \n\t" |
403 "movq %%mm0, %%mm1\n\t" | 402 "movq %%mm0, %%mm1 \n\t" |
404 "movq %%mm0, %%mm2\n\t" | 403 "movq %%mm0, %%mm2 \n\t" |
405 "movq %%mm3, %%mm4\n\t" | 404 "movq %%mm3, %%mm4 \n\t" |
406 "movq %%mm3, %%mm5\n\t" | 405 "movq %%mm3, %%mm5 \n\t" |
407 "psrlq $3, %%mm0\n\t" | 406 "psrlq $3, %%mm0 \n\t" |
408 "psrlq $3, %%mm3\n\t" | 407 "psrlq $3, %%mm3 \n\t" |
409 "pand %2, %%mm0\n\t" | 408 "pand %2, %%mm0 \n\t" |
410 "pand %2, %%mm3\n\t" | 409 "pand %2, %%mm3 \n\t" |
411 "psrlq $5, %%mm1\n\t" | 410 "psrlq $5, %%mm1 \n\t" |
412 "psrlq $5, %%mm4\n\t" | 411 "psrlq $5, %%mm4 \n\t" |
413 "pand %%mm6, %%mm1\n\t" | 412 "pand %%mm6, %%mm1 \n\t" |
414 "pand %%mm6, %%mm4\n\t" | 413 "pand %%mm6, %%mm4 \n\t" |
415 "psrlq $8, %%mm2\n\t" | 414 "psrlq $8, %%mm2 \n\t" |
416 "psrlq $8, %%mm5\n\t" | 415 "psrlq $8, %%mm5 \n\t" |
417 "pand %%mm7, %%mm2\n\t" | 416 "pand %%mm7, %%mm2 \n\t" |
418 "pand %%mm7, %%mm5\n\t" | 417 "pand %%mm7, %%mm5 \n\t" |
419 "por %%mm1, %%mm0\n\t" | 418 "por %%mm1, %%mm0 \n\t" |
420 "por %%mm4, %%mm3\n\t" | 419 "por %%mm4, %%mm3 \n\t" |
421 "por %%mm2, %%mm0\n\t" | 420 "por %%mm2, %%mm0 \n\t" |
422 "por %%mm5, %%mm3\n\t" | 421 "por %%mm5, %%mm3 \n\t" |
423 "psllq $16, %%mm3\n\t" | 422 "psllq $16, %%mm3 \n\t" |
424 "por %%mm3, %%mm0\n\t" | 423 "por %%mm3, %%mm0 \n\t" |
425 MOVNTQ" %%mm0, %0\n\t" | 424 MOVNTQ" %%mm0, %0 \n\t" |
426 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | 425 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
427 d += 4; | 426 d += 4; |
428 s += 16; | 427 s += 16; |
429 } | 428 } |
430 #endif | 429 #endif |
431 __asm __volatile(SFENCE:::"memory"); | 430 __asm __volatile(SFENCE:::"memory"); |
432 __asm __volatile(EMMS:::"memory"); | 431 __asm __volatile(EMMS:::"memory"); |
433 #endif | 432 #endif |
434 while(s < end) | 433 while (s < end) |
435 { | 434 { |
436 register int rgb = *(uint32_t*)s; s += 4; | 435 register int rgb = *(uint32_t*)s; s += 4; |
437 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); | 436 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); |
438 } | 437 } |
439 } | 438 } |
440 | 439 |
441 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) | 440 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) |
442 { | 441 { |
443 const uint8_t *s = src; | 442 const uint8_t *s = src; |
444 const uint8_t *end; | 443 const uint8_t *end; |
445 #ifdef HAVE_MMX | 444 #ifdef HAVE_MMX |
446 const uint8_t *mm_end; | 445 const uint8_t *mm_end; |
447 #endif | 446 #endif |
448 uint16_t *d = (uint16_t *)dst; | 447 uint16_t *d = (uint16_t *)dst; |
449 end = s + src_size; | 448 end = s + src_size; |
450 #ifdef HAVE_MMX | 449 #ifdef HAVE_MMX |
451 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 450 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
452 __asm __volatile( | 451 __asm __volatile( |
453 "movq %0, %%mm7\n\t" | 452 "movq %0, %%mm7 \n\t" |
454 "movq %1, %%mm6\n\t" | 453 "movq %1, %%mm6 \n\t" |
455 ::"m"(red_16mask),"m"(green_16mask)); | 454 ::"m"(red_16mask),"m"(green_16mask)); |
456 mm_end = end - 15; | 455 mm_end = end - 15; |
457 while(s < mm_end) | 456 while (s < mm_end) |
458 { | 457 { |
459 __asm __volatile( | 458 __asm __volatile( |
460 PREFETCH" 32%1\n\t" | 459 PREFETCH" 32%1 \n\t" |
461 "movd %1, %%mm0\n\t" | 460 "movd %1, %%mm0 \n\t" |
462 "movd 4%1, %%mm3\n\t" | 461 "movd 4%1, %%mm3 \n\t" |
463 "punpckldq 8%1, %%mm0\n\t" | 462 "punpckldq 8%1, %%mm0 \n\t" |
464 "punpckldq 12%1, %%mm3\n\t" | 463 "punpckldq 12%1, %%mm3 \n\t" |
465 "movq %%mm0, %%mm1\n\t" | 464 "movq %%mm0, %%mm1 \n\t" |
466 "movq %%mm0, %%mm2\n\t" | 465 "movq %%mm0, %%mm2 \n\t" |
467 "movq %%mm3, %%mm4\n\t" | 466 "movq %%mm3, %%mm4 \n\t" |
468 "movq %%mm3, %%mm5\n\t" | 467 "movq %%mm3, %%mm5 \n\t" |
469 "psllq $8, %%mm0\n\t" | 468 "psllq $8, %%mm0 \n\t" |
470 "psllq $8, %%mm3\n\t" | 469 "psllq $8, %%mm3 \n\t" |
471 "pand %%mm7, %%mm0\n\t" | 470 "pand %%mm7, %%mm0 \n\t" |
472 "pand %%mm7, %%mm3\n\t" | 471 "pand %%mm7, %%mm3 \n\t" |
473 "psrlq $5, %%mm1\n\t" | 472 "psrlq $5, %%mm1 \n\t" |
474 "psrlq $5, %%mm4\n\t" | 473 "psrlq $5, %%mm4 \n\t" |
475 "pand %%mm6, %%mm1\n\t" | 474 "pand %%mm6, %%mm1 \n\t" |
476 "pand %%mm6, %%mm4\n\t" | 475 "pand %%mm6, %%mm4 \n\t" |
477 "psrlq $19, %%mm2\n\t" | 476 "psrlq $19, %%mm2 \n\t" |
478 "psrlq $19, %%mm5\n\t" | 477 "psrlq $19, %%mm5 \n\t" |
479 "pand %2, %%mm2\n\t" | 478 "pand %2, %%mm2 \n\t" |
480 "pand %2, %%mm5\n\t" | 479 "pand %2, %%mm5 \n\t" |
481 "por %%mm1, %%mm0\n\t" | 480 "por %%mm1, %%mm0 \n\t" |
482 "por %%mm4, %%mm3\n\t" | 481 "por %%mm4, %%mm3 \n\t" |
483 "por %%mm2, %%mm0\n\t" | 482 "por %%mm2, %%mm0 \n\t" |
484 "por %%mm5, %%mm3\n\t" | 483 "por %%mm5, %%mm3 \n\t" |
485 "psllq $16, %%mm3\n\t" | 484 "psllq $16, %%mm3 \n\t" |
486 "por %%mm3, %%mm0\n\t" | 485 "por %%mm3, %%mm0 \n\t" |
487 MOVNTQ" %%mm0, %0\n\t" | 486 MOVNTQ" %%mm0, %0 \n\t" |
488 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | 487 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
489 d += 4; | 488 d += 4; |
490 s += 16; | 489 s += 16; |
491 } | 490 } |
492 __asm __volatile(SFENCE:::"memory"); | 491 __asm __volatile(SFENCE:::"memory"); |
493 __asm __volatile(EMMS:::"memory"); | 492 __asm __volatile(EMMS:::"memory"); |
494 #endif | 493 #endif |
495 while(s < end) | 494 while (s < end) |
496 { | 495 { |
497 register int rgb = *(uint32_t*)s; s += 4; | 496 register int rgb = *(uint32_t*)s; s += 4; |
498 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); | 497 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); |
499 } | 498 } |
500 } | 499 } |
501 | 500 |
502 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size) | 501 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size) |
503 { | 502 { |
504 const uint8_t *s = src; | 503 const uint8_t *s = src; |
505 const uint8_t *end; | 504 const uint8_t *end; |
506 #ifdef HAVE_MMX | 505 #ifdef HAVE_MMX |
507 const uint8_t *mm_end; | 506 const uint8_t *mm_end; |
508 #endif | 507 #endif |
509 uint16_t *d = (uint16_t *)dst; | 508 uint16_t *d = (uint16_t *)dst; |
510 end = s + src_size; | 509 end = s + src_size; |
511 #ifdef HAVE_MMX | 510 #ifdef HAVE_MMX |
512 mm_end = end - 15; | 511 mm_end = end - 15; |
513 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) | 512 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) |
514 asm volatile( | 513 asm volatile( |
515 "movq %3, %%mm5 \n\t" | 514 "movq %3, %%mm5 \n\t" |
516 "movq %4, %%mm6 \n\t" | 515 "movq %4, %%mm6 \n\t" |
517 "movq %5, %%mm7 \n\t" | 516 "movq %5, %%mm7 \n\t" |
518 "jmp 2f \n\t" | 517 "jmp 2f \n\t" |
519 ASMALIGN(4) | 518 ASMALIGN(4) |
520 "1: \n\t" | 519 "1: \n\t" |
521 PREFETCH" 32(%1) \n\t" | 520 PREFETCH" 32(%1) \n\t" |
522 "movd (%1), %%mm0 \n\t" | 521 "movd (%1), %%mm0 \n\t" |
523 "movd 4(%1), %%mm3 \n\t" | 522 "movd 4(%1), %%mm3 \n\t" |
524 "punpckldq 8(%1), %%mm0 \n\t" | 523 "punpckldq 8(%1), %%mm0 \n\t" |
525 "punpckldq 12(%1), %%mm3 \n\t" | 524 "punpckldq 12(%1), %%mm3 \n\t" |
526 "movq %%mm0, %%mm1 \n\t" | 525 "movq %%mm0, %%mm1 \n\t" |
527 "movq %%mm3, %%mm4 \n\t" | 526 "movq %%mm3, %%mm4 \n\t" |
528 "pand %%mm6, %%mm0 \n\t" | 527 "pand %%mm6, %%mm0 \n\t" |
529 "pand %%mm6, %%mm3 \n\t" | 528 "pand %%mm6, %%mm3 \n\t" |
530 "pmaddwd %%mm7, %%mm0 \n\t" | 529 "pmaddwd %%mm7, %%mm0 \n\t" |
531 "pmaddwd %%mm7, %%mm3 \n\t" | 530 "pmaddwd %%mm7, %%mm3 \n\t" |
532 "pand %%mm5, %%mm1 \n\t" | 531 "pand %%mm5, %%mm1 \n\t" |
533 "pand %%mm5, %%mm4 \n\t" | 532 "pand %%mm5, %%mm4 \n\t" |
534 "por %%mm1, %%mm0 \n\t" | 533 "por %%mm1, %%mm0 \n\t" |
535 "por %%mm4, %%mm3 \n\t" | 534 "por %%mm4, %%mm3 \n\t" |
536 "psrld $6, %%mm0 \n\t" | 535 "psrld $6, %%mm0 \n\t" |
537 "pslld $10, %%mm3 \n\t" | 536 "pslld $10, %%mm3 \n\t" |
538 "por %%mm3, %%mm0 \n\t" | 537 "por %%mm3, %%mm0 \n\t" |
539 MOVNTQ" %%mm0, (%0) \n\t" | 538 MOVNTQ" %%mm0, (%0) \n\t" |
540 "add $16, %1 \n\t" | 539 "add $16, %1 \n\t" |
541 "add $8, %0 \n\t" | 540 "add $8, %0 \n\t" |
542 "2: \n\t" | 541 "2: \n\t" |
543 "cmp %2, %1 \n\t" | 542 "cmp %2, %1 \n\t" |
544 " jb 1b \n\t" | 543 " jb 1b \n\t" |
545 : "+r" (d), "+r"(s) | 544 : "+r" (d), "+r"(s) |
546 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) | 545 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) |
547 ); | 546 ); |
548 #else | 547 #else |
549 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 548 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
550 __asm __volatile( | 549 __asm __volatile( |
551 "movq %0, %%mm7\n\t" | 550 "movq %0, %%mm7 \n\t" |
552 "movq %1, %%mm6\n\t" | 551 "movq %1, %%mm6 \n\t" |
553 ::"m"(red_15mask),"m"(green_15mask)); | 552 ::"m"(red_15mask),"m"(green_15mask)); |
554 while(s < mm_end) | 553 while (s < mm_end) |
555 { | 554 { |
556 __asm __volatile( | 555 __asm __volatile( |
557 PREFETCH" 32%1\n\t" | 556 PREFETCH" 32%1 \n\t" |
558 "movd %1, %%mm0\n\t" | 557 "movd %1, %%mm0 \n\t" |
559 "movd 4%1, %%mm3\n\t" | 558 "movd 4%1, %%mm3 \n\t" |
560 "punpckldq 8%1, %%mm0\n\t" | 559 "punpckldq 8%1, %%mm0 \n\t" |
561 "punpckldq 12%1, %%mm3\n\t" | 560 "punpckldq 12%1, %%mm3 \n\t" |
562 "movq %%mm0, %%mm1\n\t" | 561 "movq %%mm0, %%mm1 \n\t" |
563 "movq %%mm0, %%mm2\n\t" | 562 "movq %%mm0, %%mm2 \n\t" |
564 "movq %%mm3, %%mm4\n\t" | 563 "movq %%mm3, %%mm4 \n\t" |
565 "movq %%mm3, %%mm5\n\t" | 564 "movq %%mm3, %%mm5 \n\t" |
566 "psrlq $3, %%mm0\n\t" | 565 "psrlq $3, %%mm0 \n\t" |
567 "psrlq $3, %%mm3\n\t" | 566 "psrlq $3, %%mm3 \n\t" |
568 "pand %2, %%mm0\n\t" | 567 "pand %2, %%mm0 \n\t" |
569 "pand %2, %%mm3\n\t" | 568 "pand %2, %%mm3 \n\t" |
570 "psrlq $6, %%mm1\n\t" | 569 "psrlq $6, %%mm1 \n\t" |
571 "psrlq $6, %%mm4\n\t" | 570 "psrlq $6, %%mm4 \n\t" |
572 "pand %%mm6, %%mm1\n\t" | 571 "pand %%mm6, %%mm1 \n\t" |
573 "pand %%mm6, %%mm4\n\t" | 572 "pand %%mm6, %%mm4 \n\t" |
574 "psrlq $9, %%mm2\n\t" | 573 "psrlq $9, %%mm2 \n\t" |
575 "psrlq $9, %%mm5\n\t" | 574 "psrlq $9, %%mm5 \n\t" |
576 "pand %%mm7, %%mm2\n\t" | 575 "pand %%mm7, %%mm2 \n\t" |
577 "pand %%mm7, %%mm5\n\t" | 576 "pand %%mm7, %%mm5 \n\t" |
578 "por %%mm1, %%mm0\n\t" | 577 "por %%mm1, %%mm0 \n\t" |
579 "por %%mm4, %%mm3\n\t" | 578 "por %%mm4, %%mm3 \n\t" |
580 "por %%mm2, %%mm0\n\t" | 579 "por %%mm2, %%mm0 \n\t" |
581 "por %%mm5, %%mm3\n\t" | 580 "por %%mm5, %%mm3 \n\t" |
582 "psllq $16, %%mm3\n\t" | 581 "psllq $16, %%mm3 \n\t" |
583 "por %%mm3, %%mm0\n\t" | 582 "por %%mm3, %%mm0 \n\t" |
584 MOVNTQ" %%mm0, %0\n\t" | 583 MOVNTQ" %%mm0, %0 \n\t" |
585 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | 584 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
586 d += 4; | 585 d += 4; |
587 s += 16; | 586 s += 16; |
588 } | 587 } |
589 #endif | 588 #endif |
590 __asm __volatile(SFENCE:::"memory"); | 589 __asm __volatile(SFENCE:::"memory"); |
591 __asm __volatile(EMMS:::"memory"); | 590 __asm __volatile(EMMS:::"memory"); |
592 #endif | 591 #endif |
593 while(s < end) | 592 while (s < end) |
594 { | 593 { |
595 register int rgb = *(uint32_t*)s; s += 4; | 594 register int rgb = *(uint32_t*)s; s += 4; |
596 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); | 595 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); |
597 } | 596 } |
598 } | 597 } |
599 | 598 |
600 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) | 599 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) |
601 { | 600 { |
602 const uint8_t *s = src; | 601 const uint8_t *s = src; |
603 const uint8_t *end; | 602 const uint8_t *end; |
604 #ifdef HAVE_MMX | 603 #ifdef HAVE_MMX |
605 const uint8_t *mm_end; | 604 const uint8_t *mm_end; |
606 #endif | 605 #endif |
607 uint16_t *d = (uint16_t *)dst; | 606 uint16_t *d = (uint16_t *)dst; |
608 end = s + src_size; | 607 end = s + src_size; |
609 #ifdef HAVE_MMX | 608 #ifdef HAVE_MMX |
610 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 609 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
611 __asm __volatile( | 610 __asm __volatile( |
612 "movq %0, %%mm7\n\t" | 611 "movq %0, %%mm7 \n\t" |
613 "movq %1, %%mm6\n\t" | 612 "movq %1, %%mm6 \n\t" |
614 ::"m"(red_15mask),"m"(green_15mask)); | 613 ::"m"(red_15mask),"m"(green_15mask)); |
615 mm_end = end - 15; | 614 mm_end = end - 15; |
616 while(s < mm_end) | 615 while (s < mm_end) |
617 { | 616 { |
618 __asm __volatile( | 617 __asm __volatile( |
619 PREFETCH" 32%1\n\t" | 618 PREFETCH" 32%1 \n\t" |
620 "movd %1, %%mm0\n\t" | 619 "movd %1, %%mm0 \n\t" |
621 "movd 4%1, %%mm3\n\t" | 620 "movd 4%1, %%mm3 \n\t" |
622 "punpckldq 8%1, %%mm0\n\t" | 621 "punpckldq 8%1, %%mm0 \n\t" |
623 "punpckldq 12%1, %%mm3\n\t" | 622 "punpckldq 12%1, %%mm3 \n\t" |
624 "movq %%mm0, %%mm1\n\t" | 623 "movq %%mm0, %%mm1 \n\t" |
625 "movq %%mm0, %%mm2\n\t" | 624 "movq %%mm0, %%mm2 \n\t" |
626 "movq %%mm3, %%mm4\n\t" | 625 "movq %%mm3, %%mm4 \n\t" |
627 "movq %%mm3, %%mm5\n\t" | 626 "movq %%mm3, %%mm5 \n\t" |
628 "psllq $7, %%mm0\n\t" | 627 "psllq $7, %%mm0 \n\t" |
629 "psllq $7, %%mm3\n\t" | 628 "psllq $7, %%mm3 \n\t" |
630 "pand %%mm7, %%mm0\n\t" | 629 "pand %%mm7, %%mm0 \n\t" |
631 "pand %%mm7, %%mm3\n\t" | 630 "pand %%mm7, %%mm3 \n\t" |
632 "psrlq $6, %%mm1\n\t" | 631 "psrlq $6, %%mm1 \n\t" |
633 "psrlq $6, %%mm4\n\t" | 632 "psrlq $6, %%mm4 \n\t" |
634 "pand %%mm6, %%mm1\n\t" | 633 "pand %%mm6, %%mm1 \n\t" |
635 "pand %%mm6, %%mm4\n\t" | 634 "pand %%mm6, %%mm4 \n\t" |
636 "psrlq $19, %%mm2\n\t" | 635 "psrlq $19, %%mm2 \n\t" |
637 "psrlq $19, %%mm5\n\t" | 636 "psrlq $19, %%mm5 \n\t" |
638 "pand %2, %%mm2\n\t" | 637 "pand %2, %%mm2 \n\t" |
639 "pand %2, %%mm5\n\t" | 638 "pand %2, %%mm5 \n\t" |
640 "por %%mm1, %%mm0\n\t" | 639 "por %%mm1, %%mm0 \n\t" |
641 "por %%mm4, %%mm3\n\t" | 640 "por %%mm4, %%mm3 \n\t" |
642 "por %%mm2, %%mm0\n\t" | 641 "por %%mm2, %%mm0 \n\t" |
643 "por %%mm5, %%mm3\n\t" | 642 "por %%mm5, %%mm3 \n\t" |
644 "psllq $16, %%mm3\n\t" | 643 "psllq $16, %%mm3 \n\t" |
645 "por %%mm3, %%mm0\n\t" | 644 "por %%mm3, %%mm0 \n\t" |
646 MOVNTQ" %%mm0, %0\n\t" | 645 MOVNTQ" %%mm0, %0 \n\t" |
647 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | 646 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
648 d += 4; | 647 d += 4; |
649 s += 16; | 648 s += 16; |
650 } | 649 } |
651 __asm __volatile(SFENCE:::"memory"); | 650 __asm __volatile(SFENCE:::"memory"); |
652 __asm __volatile(EMMS:::"memory"); | 651 __asm __volatile(EMMS:::"memory"); |
653 #endif | 652 #endif |
654 while(s < end) | 653 while (s < end) |
655 { | 654 { |
656 register int rgb = *(uint32_t*)s; s += 4; | 655 register int rgb = *(uint32_t*)s; s += 4; |
657 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); | 656 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); |
658 } | 657 } |
659 } | 658 } |
660 | 659 |
661 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size) | 660 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size) |
662 { | 661 { |
663 const uint8_t *s = src; | 662 const uint8_t *s = src; |
664 const uint8_t *end; | 663 const uint8_t *end; |
665 #ifdef HAVE_MMX | 664 #ifdef HAVE_MMX |
666 const uint8_t *mm_end; | 665 const uint8_t *mm_end; |
667 #endif | 666 #endif |
668 uint16_t *d = (uint16_t *)dst; | 667 uint16_t *d = (uint16_t *)dst; |
669 end = s + src_size; | 668 end = s + src_size; |
670 #ifdef HAVE_MMX | 669 #ifdef HAVE_MMX |
671 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 670 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
672 __asm __volatile( | 671 __asm __volatile( |
673 "movq %0, %%mm7\n\t" | 672 "movq %0, %%mm7 \n\t" |
674 "movq %1, %%mm6\n\t" | 673 "movq %1, %%mm6 \n\t" |
675 ::"m"(red_16mask),"m"(green_16mask)); | 674 ::"m"(red_16mask),"m"(green_16mask)); |
676 mm_end = end - 11; | 675 mm_end = end - 11; |
677 while(s < mm_end) | 676 while (s < mm_end) |
678 { | 677 { |
679 __asm __volatile( | 678 __asm __volatile( |
680 PREFETCH" 32%1\n\t" | 679 PREFETCH" 32%1 \n\t" |
681 "movd %1, %%mm0\n\t" | 680 "movd %1, %%mm0 \n\t" |
682 "movd 3%1, %%mm3\n\t" | 681 "movd 3%1, %%mm3 \n\t" |
683 "punpckldq 6%1, %%mm0\n\t" | 682 "punpckldq 6%1, %%mm0 \n\t" |
684 "punpckldq 9%1, %%mm3\n\t" | 683 "punpckldq 9%1, %%mm3 \n\t" |
685 "movq %%mm0, %%mm1\n\t" | 684 "movq %%mm0, %%mm1 \n\t" |
686 "movq %%mm0, %%mm2\n\t" | 685 "movq %%mm0, %%mm2 \n\t" |
687 "movq %%mm3, %%mm4\n\t" | 686 "movq %%mm3, %%mm4 \n\t" |
688 "movq %%mm3, %%mm5\n\t" | 687 "movq %%mm3, %%mm5 \n\t" |
689 "psrlq $3, %%mm0\n\t" | 688 "psrlq $3, %%mm0 \n\t" |
690 "psrlq $3, %%mm3\n\t" | 689 "psrlq $3, %%mm3 \n\t" |
691 "pand %2, %%mm0\n\t" | 690 "pand %2, %%mm0 \n\t" |
692 "pand %2, %%mm3\n\t" | 691 "pand %2, %%mm3 \n\t" |
693 "psrlq $5, %%mm1\n\t" | 692 "psrlq $5, %%mm1 \n\t" |
694 "psrlq $5, %%mm4\n\t" | 693 "psrlq $5, %%mm4 \n\t" |
695 "pand %%mm6, %%mm1\n\t" | 694 "pand %%mm6, %%mm1 \n\t" |
696 "pand %%mm6, %%mm4\n\t" | 695 "pand %%mm6, %%mm4 \n\t" |
697 "psrlq $8, %%mm2\n\t" | 696 "psrlq $8, %%mm2 \n\t" |
698 "psrlq $8, %%mm5\n\t" | 697 "psrlq $8, %%mm5 \n\t" |
699 "pand %%mm7, %%mm2\n\t" | 698 "pand %%mm7, %%mm2 \n\t" |
700 "pand %%mm7, %%mm5\n\t" | 699 "pand %%mm7, %%mm5 \n\t" |
701 "por %%mm1, %%mm0\n\t" | 700 "por %%mm1, %%mm0 \n\t" |
702 "por %%mm4, %%mm3\n\t" | 701 "por %%mm4, %%mm3 \n\t" |
703 "por %%mm2, %%mm0\n\t" | 702 "por %%mm2, %%mm0 \n\t" |
704 "por %%mm5, %%mm3\n\t" | 703 "por %%mm5, %%mm3 \n\t" |
705 "psllq $16, %%mm3\n\t" | 704 "psllq $16, %%mm3 \n\t" |
706 "por %%mm3, %%mm0\n\t" | 705 "por %%mm3, %%mm0 \n\t" |
707 MOVNTQ" %%mm0, %0\n\t" | 706 MOVNTQ" %%mm0, %0 \n\t" |
708 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | 707 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
709 d += 4; | 708 d += 4; |
710 s += 12; | 709 s += 12; |
711 } | 710 } |
712 __asm __volatile(SFENCE:::"memory"); | 711 __asm __volatile(SFENCE:::"memory"); |
713 __asm __volatile(EMMS:::"memory"); | 712 __asm __volatile(EMMS:::"memory"); |
714 #endif | 713 #endif |
715 while(s < end) | 714 while (s < end) |
716 { | 715 { |
717 const int b= *s++; | 716 const int b = *s++; |
718 const int g= *s++; | 717 const int g = *s++; |
719 const int r= *s++; | 718 const int r = *s++; |
720 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | 719 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
721 } | 720 } |
722 } | 721 } |
723 | 722 |
724 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) | 723 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) |
725 { | 724 { |
726 const uint8_t *s = src; | 725 const uint8_t *s = src; |
727 const uint8_t *end; | 726 const uint8_t *end; |
728 #ifdef HAVE_MMX | 727 #ifdef HAVE_MMX |
729 const uint8_t *mm_end; | 728 const uint8_t *mm_end; |
730 #endif | 729 #endif |
731 uint16_t *d = (uint16_t *)dst; | 730 uint16_t *d = (uint16_t *)dst; |
732 end = s + src_size; | 731 end = s + src_size; |
733 #ifdef HAVE_MMX | 732 #ifdef HAVE_MMX |
734 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 733 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
735 __asm __volatile( | 734 __asm __volatile( |
736 "movq %0, %%mm7\n\t" | 735 "movq %0, %%mm7 \n\t" |
737 "movq %1, %%mm6\n\t" | 736 "movq %1, %%mm6 \n\t" |
738 ::"m"(red_16mask),"m"(green_16mask)); | 737 ::"m"(red_16mask),"m"(green_16mask)); |
739 mm_end = end - 15; | 738 mm_end = end - 15; |
740 while(s < mm_end) | 739 while (s < mm_end) |
741 { | 740 { |
742 __asm __volatile( | 741 __asm __volatile( |
743 PREFETCH" 32%1\n\t" | 742 PREFETCH" 32%1 \n\t" |
744 "movd %1, %%mm0\n\t" | 743 "movd %1, %%mm0 \n\t" |
745 "movd 3%1, %%mm3\n\t" | 744 "movd 3%1, %%mm3 \n\t" |
746 "punpckldq 6%1, %%mm0\n\t" | 745 "punpckldq 6%1, %%mm0 \n\t" |
747 "punpckldq 9%1, %%mm3\n\t" | 746 "punpckldq 9%1, %%mm3 \n\t" |
748 "movq %%mm0, %%mm1\n\t" | 747 "movq %%mm0, %%mm1 \n\t" |
749 "movq %%mm0, %%mm2\n\t" | 748 "movq %%mm0, %%mm2 \n\t" |
750 "movq %%mm3, %%mm4\n\t" | 749 "movq %%mm3, %%mm4 \n\t" |
751 "movq %%mm3, %%mm5\n\t" | 750 "movq %%mm3, %%mm5 \n\t" |
752 "psllq $8, %%mm0\n\t" | 751 "psllq $8, %%mm0 \n\t" |
753 "psllq $8, %%mm3\n\t" | 752 "psllq $8, %%mm3 \n\t" |
754 "pand %%mm7, %%mm0\n\t" | 753 "pand %%mm7, %%mm0 \n\t" |
755 "pand %%mm7, %%mm3\n\t" | 754 "pand %%mm7, %%mm3 \n\t" |
756 "psrlq $5, %%mm1\n\t" | 755 "psrlq $5, %%mm1 \n\t" |
757 "psrlq $5, %%mm4\n\t" | 756 "psrlq $5, %%mm4 \n\t" |
758 "pand %%mm6, %%mm1\n\t" | 757 "pand %%mm6, %%mm1 \n\t" |
759 "pand %%mm6, %%mm4\n\t" | 758 "pand %%mm6, %%mm4 \n\t" |
760 "psrlq $19, %%mm2\n\t" | 759 "psrlq $19, %%mm2 \n\t" |
761 "psrlq $19, %%mm5\n\t" | 760 "psrlq $19, %%mm5 \n\t" |
762 "pand %2, %%mm2\n\t" | 761 "pand %2, %%mm2 \n\t" |
763 "pand %2, %%mm5\n\t" | 762 "pand %2, %%mm5 \n\t" |
764 "por %%mm1, %%mm0\n\t" | 763 "por %%mm1, %%mm0 \n\t" |
765 "por %%mm4, %%mm3\n\t" | 764 "por %%mm4, %%mm3 \n\t" |
766 "por %%mm2, %%mm0\n\t" | 765 "por %%mm2, %%mm0 \n\t" |
767 "por %%mm5, %%mm3\n\t" | 766 "por %%mm5, %%mm3 \n\t" |
768 "psllq $16, %%mm3\n\t" | 767 "psllq $16, %%mm3 \n\t" |
769 "por %%mm3, %%mm0\n\t" | 768 "por %%mm3, %%mm0 \n\t" |
770 MOVNTQ" %%mm0, %0\n\t" | 769 MOVNTQ" %%mm0, %0 \n\t" |
771 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | 770 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
772 d += 4; | 771 d += 4; |
773 s += 12; | 772 s += 12; |
774 } | 773 } |
775 __asm __volatile(SFENCE:::"memory"); | 774 __asm __volatile(SFENCE:::"memory"); |
776 __asm __volatile(EMMS:::"memory"); | 775 __asm __volatile(EMMS:::"memory"); |
777 #endif | 776 #endif |
778 while(s < end) | 777 while (s < end) |
779 { | 778 { |
780 const int r= *s++; | 779 const int r = *s++; |
781 const int g= *s++; | 780 const int g = *s++; |
782 const int b= *s++; | 781 const int b = *s++; |
783 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | 782 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
784 } | 783 } |
785 } | 784 } |
786 | 785 |
787 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size) | 786 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size) |
788 { | 787 { |
789 const uint8_t *s = src; | 788 const uint8_t *s = src; |
790 const uint8_t *end; | 789 const uint8_t *end; |
791 #ifdef HAVE_MMX | 790 #ifdef HAVE_MMX |
792 const uint8_t *mm_end; | 791 const uint8_t *mm_end; |
793 #endif | 792 #endif |
794 uint16_t *d = (uint16_t *)dst; | 793 uint16_t *d = (uint16_t *)dst; |
795 end = s + src_size; | 794 end = s + src_size; |
796 #ifdef HAVE_MMX | 795 #ifdef HAVE_MMX |
797 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 796 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
798 __asm __volatile( | 797 __asm __volatile( |
799 "movq %0, %%mm7\n\t" | 798 "movq %0, %%mm7 \n\t" |
800 "movq %1, %%mm6\n\t" | 799 "movq %1, %%mm6 \n\t" |
801 ::"m"(red_15mask),"m"(green_15mask)); | 800 ::"m"(red_15mask),"m"(green_15mask)); |
802 mm_end = end - 11; | 801 mm_end = end - 11; |
803 while(s < mm_end) | 802 while (s < mm_end) |
804 { | 803 { |
805 __asm __volatile( | 804 __asm __volatile( |
806 PREFETCH" 32%1\n\t" | 805 PREFETCH" 32%1 \n\t" |
807 "movd %1, %%mm0\n\t" | 806 "movd %1, %%mm0 \n\t" |
808 "movd 3%1, %%mm3\n\t" | 807 "movd 3%1, %%mm3 \n\t" |
809 "punpckldq 6%1, %%mm0\n\t" | 808 "punpckldq 6%1, %%mm0 \n\t" |
810 "punpckldq 9%1, %%mm3\n\t" | 809 "punpckldq 9%1, %%mm3 \n\t" |
811 "movq %%mm0, %%mm1\n\t" | 810 "movq %%mm0, %%mm1 \n\t" |
812 "movq %%mm0, %%mm2\n\t" | 811 "movq %%mm0, %%mm2 \n\t" |
813 "movq %%mm3, %%mm4\n\t" | 812 "movq %%mm3, %%mm4 \n\t" |
814 "movq %%mm3, %%mm5\n\t" | 813 "movq %%mm3, %%mm5 \n\t" |
815 "psrlq $3, %%mm0\n\t" | 814 "psrlq $3, %%mm0 \n\t" |
816 "psrlq $3, %%mm3\n\t" | 815 "psrlq $3, %%mm3 \n\t" |
817 "pand %2, %%mm0\n\t" | 816 "pand %2, %%mm0 \n\t" |
818 "pand %2, %%mm3\n\t" | 817 "pand %2, %%mm3 \n\t" |
819 "psrlq $6, %%mm1\n\t" | 818 "psrlq $6, %%mm1 \n\t" |
820 "psrlq $6, %%mm4\n\t" | 819 "psrlq $6, %%mm4 \n\t" |
821 "pand %%mm6, %%mm1\n\t" | 820 "pand %%mm6, %%mm1 \n\t" |
822 "pand %%mm6, %%mm4\n\t" | 821 "pand %%mm6, %%mm4 \n\t" |
823 "psrlq $9, %%mm2\n\t" | 822 "psrlq $9, %%mm2 \n\t" |
824 "psrlq $9, %%mm5\n\t" | 823 "psrlq $9, %%mm5 \n\t" |
825 "pand %%mm7, %%mm2\n\t" | 824 "pand %%mm7, %%mm2 \n\t" |
826 "pand %%mm7, %%mm5\n\t" | 825 "pand %%mm7, %%mm5 \n\t" |
827 "por %%mm1, %%mm0\n\t" | 826 "por %%mm1, %%mm0 \n\t" |
828 "por %%mm4, %%mm3\n\t" | 827 "por %%mm4, %%mm3 \n\t" |
829 "por %%mm2, %%mm0\n\t" | 828 "por %%mm2, %%mm0 \n\t" |
830 "por %%mm5, %%mm3\n\t" | 829 "por %%mm5, %%mm3 \n\t" |
831 "psllq $16, %%mm3\n\t" | 830 "psllq $16, %%mm3 \n\t" |
832 "por %%mm3, %%mm0\n\t" | 831 "por %%mm3, %%mm0 \n\t" |
833 MOVNTQ" %%mm0, %0\n\t" | 832 MOVNTQ" %%mm0, %0 \n\t" |
834 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | 833 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
835 d += 4; | 834 d += 4; |
836 s += 12; | 835 s += 12; |
837 } | 836 } |
838 __asm __volatile(SFENCE:::"memory"); | 837 __asm __volatile(SFENCE:::"memory"); |
839 __asm __volatile(EMMS:::"memory"); | 838 __asm __volatile(EMMS:::"memory"); |
840 #endif | 839 #endif |
841 while(s < end) | 840 while (s < end) |
842 { | 841 { |
843 const int b= *s++; | 842 const int b = *s++; |
844 const int g= *s++; | 843 const int g = *s++; |
845 const int r= *s++; | 844 const int r = *s++; |
846 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | 845 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
847 } | 846 } |
848 } | 847 } |
849 | 848 |
850 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) | 849 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) |
851 { | 850 { |
852 const uint8_t *s = src; | 851 const uint8_t *s = src; |
853 const uint8_t *end; | 852 const uint8_t *end; |
854 #ifdef HAVE_MMX | 853 #ifdef HAVE_MMX |
855 const uint8_t *mm_end; | 854 const uint8_t *mm_end; |
856 #endif | 855 #endif |
857 uint16_t *d = (uint16_t *)dst; | 856 uint16_t *d = (uint16_t *)dst; |
858 end = s + src_size; | 857 end = s + src_size; |
859 #ifdef HAVE_MMX | 858 #ifdef HAVE_MMX |
860 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 859 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
861 __asm __volatile( | 860 __asm __volatile( |
862 "movq %0, %%mm7\n\t" | 861 "movq %0, %%mm7 \n\t" |
863 "movq %1, %%mm6\n\t" | 862 "movq %1, %%mm6 \n\t" |
864 ::"m"(red_15mask),"m"(green_15mask)); | 863 ::"m"(red_15mask),"m"(green_15mask)); |
865 mm_end = end - 15; | 864 mm_end = end - 15; |
866 while(s < mm_end) | 865 while (s < mm_end) |
867 { | 866 { |
868 __asm __volatile( | 867 __asm __volatile( |
869 PREFETCH" 32%1\n\t" | 868 PREFETCH" 32%1 \n\t" |
870 "movd %1, %%mm0\n\t" | 869 "movd %1, %%mm0 \n\t" |
871 "movd 3%1, %%mm3\n\t" | 870 "movd 3%1, %%mm3 \n\t" |
872 "punpckldq 6%1, %%mm0\n\t" | 871 "punpckldq 6%1, %%mm0 \n\t" |
873 "punpckldq 9%1, %%mm3\n\t" | 872 "punpckldq 9%1, %%mm3 \n\t" |
874 "movq %%mm0, %%mm1\n\t" | 873 "movq %%mm0, %%mm1 \n\t" |
875 "movq %%mm0, %%mm2\n\t" | 874 "movq %%mm0, %%mm2 \n\t" |
876 "movq %%mm3, %%mm4\n\t" | 875 "movq %%mm3, %%mm4 \n\t" |
877 "movq %%mm3, %%mm5\n\t" | 876 "movq %%mm3, %%mm5 \n\t" |
878 "psllq $7, %%mm0\n\t" | 877 "psllq $7, %%mm0 \n\t" |
879 "psllq $7, %%mm3\n\t" | 878 "psllq $7, %%mm3 \n\t" |
880 "pand %%mm7, %%mm0\n\t" | 879 "pand %%mm7, %%mm0 \n\t" |
881 "pand %%mm7, %%mm3\n\t" | 880 "pand %%mm7, %%mm3 \n\t" |
882 "psrlq $6, %%mm1\n\t" | 881 "psrlq $6, %%mm1 \n\t" |
883 "psrlq $6, %%mm4\n\t" | 882 "psrlq $6, %%mm4 \n\t" |
884 "pand %%mm6, %%mm1\n\t" | 883 "pand %%mm6, %%mm1 \n\t" |
885 "pand %%mm6, %%mm4\n\t" | 884 "pand %%mm6, %%mm4 \n\t" |
886 "psrlq $19, %%mm2\n\t" | 885 "psrlq $19, %%mm2 \n\t" |
887 "psrlq $19, %%mm5\n\t" | 886 "psrlq $19, %%mm5 \n\t" |
888 "pand %2, %%mm2\n\t" | 887 "pand %2, %%mm2 \n\t" |
889 "pand %2, %%mm5\n\t" | 888 "pand %2, %%mm5 \n\t" |
890 "por %%mm1, %%mm0\n\t" | 889 "por %%mm1, %%mm0 \n\t" |
891 "por %%mm4, %%mm3\n\t" | 890 "por %%mm4, %%mm3 \n\t" |
892 "por %%mm2, %%mm0\n\t" | 891 "por %%mm2, %%mm0 \n\t" |
893 "por %%mm5, %%mm3\n\t" | 892 "por %%mm5, %%mm3 \n\t" |
894 "psllq $16, %%mm3\n\t" | 893 "psllq $16, %%mm3 \n\t" |
895 "por %%mm3, %%mm0\n\t" | 894 "por %%mm3, %%mm0 \n\t" |
896 MOVNTQ" %%mm0, %0\n\t" | 895 MOVNTQ" %%mm0, %0 \n\t" |
897 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | 896 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
898 d += 4; | 897 d += 4; |
899 s += 12; | 898 s += 12; |
900 } | 899 } |
901 __asm __volatile(SFENCE:::"memory"); | 900 __asm __volatile(SFENCE:::"memory"); |
902 __asm __volatile(EMMS:::"memory"); | 901 __asm __volatile(EMMS:::"memory"); |
903 #endif | 902 #endif |
904 while(s < end) | 903 while (s < end) |
905 { | 904 { |
906 const int r= *s++; | 905 const int r = *s++; |
907 const int g= *s++; | 906 const int g = *s++; |
908 const int b= *s++; | 907 const int b = *s++; |
909 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | 908 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
910 } | 909 } |
911 } | 910 } |
912 | 911 |
913 /* | 912 /* |
914 I use here less accurate approximation by simply | 913 I use here less accurate approximation by simply |
915 left-shifting the input | 914 left-shifting the input |
933 | | 932 | |
934 Original Bits | 933 Original Bits |
935 */ | 934 */ |
936 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size) | 935 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size) |
937 { | 936 { |
938 const uint16_t *end; | 937 const uint16_t *end; |
939 #ifdef HAVE_MMX | 938 #ifdef HAVE_MMX |
940 const uint16_t *mm_end; | 939 const uint16_t *mm_end; |
941 #endif | 940 #endif |
942 uint8_t *d = (uint8_t *)dst; | 941 uint8_t *d = (uint8_t *)dst; |
943 const uint16_t *s = (uint16_t *)src; | 942 const uint16_t *s = (uint16_t *)src; |
944 end = s + src_size/2; | 943 end = s + src_size/2; |
945 #ifdef HAVE_MMX | 944 #ifdef HAVE_MMX |
946 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 945 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
947 mm_end = end - 7; | 946 mm_end = end - 7; |
948 while(s < mm_end) | 947 while (s < mm_end) |
949 { | 948 { |
950 __asm __volatile( | 949 __asm __volatile( |
951 PREFETCH" 32%1\n\t" | 950 PREFETCH" 32%1 \n\t" |
952 "movq %1, %%mm0\n\t" | 951 "movq %1, %%mm0 \n\t" |
953 "movq %1, %%mm1\n\t" | 952 "movq %1, %%mm1 \n\t" |
954 "movq %1, %%mm2\n\t" | 953 "movq %1, %%mm2 \n\t" |
955 "pand %2, %%mm0\n\t" | 954 "pand %2, %%mm0 \n\t" |
956 "pand %3, %%mm1\n\t" | 955 "pand %3, %%mm1 \n\t" |
957 "pand %4, %%mm2\n\t" | 956 "pand %4, %%mm2 \n\t" |
958 "psllq $3, %%mm0\n\t" | 957 "psllq $3, %%mm0 \n\t" |
959 "psrlq $2, %%mm1\n\t" | 958 "psrlq $2, %%mm1 \n\t" |
960 "psrlq $7, %%mm2\n\t" | 959 "psrlq $7, %%mm2 \n\t" |
961 "movq %%mm0, %%mm3\n\t" | 960 "movq %%mm0, %%mm3 \n\t" |
962 "movq %%mm1, %%mm4\n\t" | 961 "movq %%mm1, %%mm4 \n\t" |
963 "movq %%mm2, %%mm5\n\t" | 962 "movq %%mm2, %%mm5 \n\t" |
964 "punpcklwd %5, %%mm0\n\t" | 963 "punpcklwd %5, %%mm0 \n\t" |
965 "punpcklwd %5, %%mm1\n\t" | 964 "punpcklwd %5, %%mm1 \n\t" |
966 "punpcklwd %5, %%mm2\n\t" | 965 "punpcklwd %5, %%mm2 \n\t" |
967 "punpckhwd %5, %%mm3\n\t" | 966 "punpckhwd %5, %%mm3 \n\t" |
968 "punpckhwd %5, %%mm4\n\t" | 967 "punpckhwd %5, %%mm4 \n\t" |
969 "punpckhwd %5, %%mm5\n\t" | 968 "punpckhwd %5, %%mm5 \n\t" |
970 "psllq $8, %%mm1\n\t" | 969 "psllq $8, %%mm1 \n\t" |
971 "psllq $16, %%mm2\n\t" | 970 "psllq $16, %%mm2 \n\t" |
972 "por %%mm1, %%mm0\n\t" | 971 "por %%mm1, %%mm0 \n\t" |
973 "por %%mm2, %%mm0\n\t" | 972 "por %%mm2, %%mm0 \n\t" |
974 "psllq $8, %%mm4\n\t" | 973 "psllq $8, %%mm4 \n\t" |
975 "psllq $16, %%mm5\n\t" | 974 "psllq $16, %%mm5 \n\t" |
976 "por %%mm4, %%mm3\n\t" | 975 "por %%mm4, %%mm3 \n\t" |
977 "por %%mm5, %%mm3\n\t" | 976 "por %%mm5, %%mm3 \n\t" |
978 | 977 |
979 "movq %%mm0, %%mm6\n\t" | 978 "movq %%mm0, %%mm6 \n\t" |
980 "movq %%mm3, %%mm7\n\t" | 979 "movq %%mm3, %%mm7 \n\t" |
981 | 980 |
982 "movq 8%1, %%mm0\n\t" | 981 "movq 8%1, %%mm0 \n\t" |
983 "movq 8%1, %%mm1\n\t" | 982 "movq 8%1, %%mm1 \n\t" |
984 "movq 8%1, %%mm2\n\t" | 983 "movq 8%1, %%mm2 \n\t" |
985 "pand %2, %%mm0\n\t" | 984 "pand %2, %%mm0 \n\t" |
986 "pand %3, %%mm1\n\t" | 985 "pand %3, %%mm1 \n\t" |
987 "pand %4, %%mm2\n\t" | 986 "pand %4, %%mm2 \n\t" |
988 "psllq $3, %%mm0\n\t" | 987 "psllq $3, %%mm0 \n\t" |
989 "psrlq $2, %%mm1\n\t" | 988 "psrlq $2, %%mm1 \n\t" |
990 "psrlq $7, %%mm2\n\t" | 989 "psrlq $7, %%mm2 \n\t" |
991 "movq %%mm0, %%mm3\n\t" | 990 "movq %%mm0, %%mm3 \n\t" |
992 "movq %%mm1, %%mm4\n\t" | 991 "movq %%mm1, %%mm4 \n\t" |
993 "movq %%mm2, %%mm5\n\t" | 992 "movq %%mm2, %%mm5 \n\t" |
994 "punpcklwd %5, %%mm0\n\t" | 993 "punpcklwd %5, %%mm0 \n\t" |
995 "punpcklwd %5, %%mm1\n\t" | 994 "punpcklwd %5, %%mm1 \n\t" |
996 "punpcklwd %5, %%mm2\n\t" | 995 "punpcklwd %5, %%mm2 \n\t" |
997 "punpckhwd %5, %%mm3\n\t" | 996 "punpckhwd %5, %%mm3 \n\t" |
998 "punpckhwd %5, %%mm4\n\t" | 997 "punpckhwd %5, %%mm4 \n\t" |
999 "punpckhwd %5, %%mm5\n\t" | 998 "punpckhwd %5, %%mm5 \n\t" |
1000 "psllq $8, %%mm1\n\t" | 999 "psllq $8, %%mm1 \n\t" |
1001 "psllq $16, %%mm2\n\t" | 1000 "psllq $16, %%mm2 \n\t" |
1002 "por %%mm1, %%mm0\n\t" | 1001 "por %%mm1, %%mm0 \n\t" |
1003 "por %%mm2, %%mm0\n\t" | 1002 "por %%mm2, %%mm0 \n\t" |
1004 "psllq $8, %%mm4\n\t" | 1003 "psllq $8, %%mm4 \n\t" |
1005 "psllq $16, %%mm5\n\t" | 1004 "psllq $16, %%mm5 \n\t" |
1006 "por %%mm4, %%mm3\n\t" | 1005 "por %%mm4, %%mm3 \n\t" |
1007 "por %%mm5, %%mm3\n\t" | 1006 "por %%mm5, %%mm3 \n\t" |
1008 | 1007 |
1009 :"=m"(*d) | 1008 :"=m"(*d) |
1010 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | 1009 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) |
1011 :"memory"); | 1010 :"memory"); |
1012 /* Borrowed 32 to 24 */ | 1011 /* Borrowed 32 to 24 */ |
1013 __asm __volatile( | 1012 __asm __volatile( |
1014 "movq %%mm0, %%mm4\n\t" | 1013 "movq %%mm0, %%mm4 \n\t" |
1015 "movq %%mm3, %%mm5\n\t" | 1014 "movq %%mm3, %%mm5 \n\t" |
1016 "movq %%mm6, %%mm0\n\t" | 1015 "movq %%mm6, %%mm0 \n\t" |
1017 "movq %%mm7, %%mm1\n\t" | 1016 "movq %%mm7, %%mm1 \n\t" |
1018 | 1017 |
1019 "movq %%mm4, %%mm6\n\t" | 1018 "movq %%mm4, %%mm6 \n\t" |
1020 "movq %%mm5, %%mm7\n\t" | 1019 "movq %%mm5, %%mm7 \n\t" |
1021 "movq %%mm0, %%mm2\n\t" | 1020 "movq %%mm0, %%mm2 \n\t" |
1022 "movq %%mm1, %%mm3\n\t" | 1021 "movq %%mm1, %%mm3 \n\t" |
1023 | 1022 |
1024 "psrlq $8, %%mm2\n\t" | 1023 "psrlq $8, %%mm2 \n\t" |
1025 "psrlq $8, %%mm3\n\t" | 1024 "psrlq $8, %%mm3 \n\t" |
1026 "psrlq $8, %%mm6\n\t" | 1025 "psrlq $8, %%mm6 \n\t" |
1027 "psrlq $8, %%mm7\n\t" | 1026 "psrlq $8, %%mm7 \n\t" |
1028 "pand %2, %%mm0\n\t" | 1027 "pand %2, %%mm0 \n\t" |
1029 "pand %2, %%mm1\n\t" | 1028 "pand %2, %%mm1 \n\t" |
1030 "pand %2, %%mm4\n\t" | 1029 "pand %2, %%mm4 \n\t" |
1031 "pand %2, %%mm5\n\t" | 1030 "pand %2, %%mm5 \n\t" |
1032 "pand %3, %%mm2\n\t" | 1031 "pand %3, %%mm2 \n\t" |
1033 "pand %3, %%mm3\n\t" | 1032 "pand %3, %%mm3 \n\t" |
1034 "pand %3, %%mm6\n\t" | 1033 "pand %3, %%mm6 \n\t" |
1035 "pand %3, %%mm7\n\t" | 1034 "pand %3, %%mm7 \n\t" |
1036 "por %%mm2, %%mm0\n\t" | 1035 "por %%mm2, %%mm0 \n\t" |
1037 "por %%mm3, %%mm1\n\t" | 1036 "por %%mm3, %%mm1 \n\t" |
1038 "por %%mm6, %%mm4\n\t" | 1037 "por %%mm6, %%mm4 \n\t" |
1039 "por %%mm7, %%mm5\n\t" | 1038 "por %%mm7, %%mm5 \n\t" |
1040 | 1039 |
1041 "movq %%mm1, %%mm2\n\t" | 1040 "movq %%mm1, %%mm2 \n\t" |
1042 "movq %%mm4, %%mm3\n\t" | 1041 "movq %%mm4, %%mm3 \n\t" |
1043 "psllq $48, %%mm2\n\t" | 1042 "psllq $48, %%mm2 \n\t" |
1044 "psllq $32, %%mm3\n\t" | 1043 "psllq $32, %%mm3 \n\t" |
1045 "pand %4, %%mm2\n\t" | 1044 "pand %4, %%mm2 \n\t" |
1046 "pand %5, %%mm3\n\t" | 1045 "pand %5, %%mm3 \n\t" |
1047 "por %%mm2, %%mm0\n\t" | 1046 "por %%mm2, %%mm0 \n\t" |
1048 "psrlq $16, %%mm1\n\t" | 1047 "psrlq $16, %%mm1 \n\t" |
1049 "psrlq $32, %%mm4\n\t" | 1048 "psrlq $32, %%mm4 \n\t" |
1050 "psllq $16, %%mm5\n\t" | 1049 "psllq $16, %%mm5 \n\t" |
1051 "por %%mm3, %%mm1\n\t" | 1050 "por %%mm3, %%mm1 \n\t" |
1052 "pand %6, %%mm5\n\t" | 1051 "pand %6, %%mm5 \n\t" |
1053 "por %%mm5, %%mm4\n\t" | 1052 "por %%mm5, %%mm4 \n\t" |
1054 | 1053 |
1055 MOVNTQ" %%mm0, %0\n\t" | 1054 MOVNTQ" %%mm0, %0 \n\t" |
1056 MOVNTQ" %%mm1, 8%0\n\t" | 1055 MOVNTQ" %%mm1, 8%0 \n\t" |
1057 MOVNTQ" %%mm4, 16%0" | 1056 MOVNTQ" %%mm4, 16%0" |
1058 | 1057 |
1059 :"=m"(*d) | 1058 :"=m"(*d) |
1060 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | 1059 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) |
1061 :"memory"); | 1060 :"memory"); |
1062 d += 24; | 1061 d += 24; |
1063 s += 8; | 1062 s += 8; |
1064 } | 1063 } |
1065 __asm __volatile(SFENCE:::"memory"); | 1064 __asm __volatile(SFENCE:::"memory"); |
1066 __asm __volatile(EMMS:::"memory"); | 1065 __asm __volatile(EMMS:::"memory"); |
1067 #endif | 1066 #endif |
1068 while(s < end) | 1067 while (s < end) |
1069 { | 1068 { |
1070 register uint16_t bgr; | 1069 register uint16_t bgr; |
1071 bgr = *s++; | 1070 bgr = *s++; |
1072 *d++ = (bgr&0x1F)<<3; | 1071 *d++ = (bgr&0x1F)<<3; |
1073 *d++ = (bgr&0x3E0)>>2; | 1072 *d++ = (bgr&0x3E0)>>2; |
1074 *d++ = (bgr&0x7C00)>>7; | 1073 *d++ = (bgr&0x7C00)>>7; |
1075 } | 1074 } |
1076 } | 1075 } |
1077 | 1076 |
1078 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_size) | 1077 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_size) |
1079 { | 1078 { |
1080 const uint16_t *end; | 1079 const uint16_t *end; |
1081 #ifdef HAVE_MMX | 1080 #ifdef HAVE_MMX |
1082 const uint16_t *mm_end; | 1081 const uint16_t *mm_end; |
1083 #endif | 1082 #endif |
1084 uint8_t *d = (uint8_t *)dst; | 1083 uint8_t *d = (uint8_t *)dst; |
1085 const uint16_t *s = (const uint16_t *)src; | 1084 const uint16_t *s = (const uint16_t *)src; |
1086 end = s + src_size/2; | 1085 end = s + src_size/2; |
1087 #ifdef HAVE_MMX | 1086 #ifdef HAVE_MMX |
1088 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 1087 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1089 mm_end = end - 7; | 1088 mm_end = end - 7; |
1090 while(s < mm_end) | 1089 while (s < mm_end) |
1091 { | 1090 { |
1092 __asm __volatile( | 1091 __asm __volatile( |
1093 PREFETCH" 32%1\n\t" | 1092 PREFETCH" 32%1 \n\t" |
1094 "movq %1, %%mm0\n\t" | 1093 "movq %1, %%mm0 \n\t" |
1095 "movq %1, %%mm1\n\t" | 1094 "movq %1, %%mm1 \n\t" |
1096 "movq %1, %%mm2\n\t" | 1095 "movq %1, %%mm2 \n\t" |
1097 "pand %2, %%mm0\n\t" | 1096 "pand %2, %%mm0 \n\t" |
1098 "pand %3, %%mm1\n\t" | 1097 "pand %3, %%mm1 \n\t" |
1099 "pand %4, %%mm2\n\t" | 1098 "pand %4, %%mm2 \n\t" |
1100 "psllq $3, %%mm0\n\t" | 1099 "psllq $3, %%mm0 \n\t" |
1101 "psrlq $3, %%mm1\n\t" | 1100 "psrlq $3, %%mm1 \n\t" |
1102 "psrlq $8, %%mm2\n\t" | 1101 "psrlq $8, %%mm2 \n\t" |
1103 "movq %%mm0, %%mm3\n\t" | 1102 "movq %%mm0, %%mm3 \n\t" |
1104 "movq %%mm1, %%mm4\n\t" | 1103 "movq %%mm1, %%mm4 \n\t" |
1105 "movq %%mm2, %%mm5\n\t" | 1104 "movq %%mm2, %%mm5 \n\t" |
1106 "punpcklwd %5, %%mm0\n\t" | 1105 "punpcklwd %5, %%mm0 \n\t" |
1107 "punpcklwd %5, %%mm1\n\t" | 1106 "punpcklwd %5, %%mm1 \n\t" |
1108 "punpcklwd %5, %%mm2\n\t" | 1107 "punpcklwd %5, %%mm2 \n\t" |
1109 "punpckhwd %5, %%mm3\n\t" | 1108 "punpckhwd %5, %%mm3 \n\t" |
1110 "punpckhwd %5, %%mm4\n\t" | 1109 "punpckhwd %5, %%mm4 \n\t" |
1111 "punpckhwd %5, %%mm5\n\t" | 1110 "punpckhwd %5, %%mm5 \n\t" |
1112 "psllq $8, %%mm1\n\t" | 1111 "psllq $8, %%mm1 \n\t" |
1113 "psllq $16, %%mm2\n\t" | 1112 "psllq $16, %%mm2 \n\t" |
1114 "por %%mm1, %%mm0\n\t" | 1113 "por %%mm1, %%mm0 \n\t" |
1115 "por %%mm2, %%mm0\n\t" | 1114 "por %%mm2, %%mm0 \n\t" |
1116 "psllq $8, %%mm4\n\t" | 1115 "psllq $8, %%mm4 \n\t" |
1117 "psllq $16, %%mm5\n\t" | 1116 "psllq $16, %%mm5 \n\t" |
1118 "por %%mm4, %%mm3\n\t" | 1117 "por %%mm4, %%mm3 \n\t" |
1119 "por %%mm5, %%mm3\n\t" | 1118 "por %%mm5, %%mm3 \n\t" |
1120 | 1119 |
1121 "movq %%mm0, %%mm6\n\t" | 1120 "movq %%mm0, %%mm6 \n\t" |
1122 "movq %%mm3, %%mm7\n\t" | 1121 "movq %%mm3, %%mm7 \n\t" |
1123 | 1122 |
1124 "movq 8%1, %%mm0\n\t" | 1123 "movq 8%1, %%mm0 \n\t" |
1125 "movq 8%1, %%mm1\n\t" | 1124 "movq 8%1, %%mm1 \n\t" |
1126 "movq 8%1, %%mm2\n\t" | 1125 "movq 8%1, %%mm2 \n\t" |
1127 "pand %2, %%mm0\n\t" | 1126 "pand %2, %%mm0 \n\t" |
1128 "pand %3, %%mm1\n\t" | 1127 "pand %3, %%mm1 \n\t" |
1129 "pand %4, %%mm2\n\t" | 1128 "pand %4, %%mm2 \n\t" |
1130 "psllq $3, %%mm0\n\t" | 1129 "psllq $3, %%mm0 \n\t" |
1131 "psrlq $3, %%mm1\n\t" | 1130 "psrlq $3, %%mm1 \n\t" |
1132 "psrlq $8, %%mm2\n\t" | 1131 "psrlq $8, %%mm2 \n\t" |
1133 "movq %%mm0, %%mm3\n\t" | 1132 "movq %%mm0, %%mm3 \n\t" |
1134 "movq %%mm1, %%mm4\n\t" | 1133 "movq %%mm1, %%mm4 \n\t" |
1135 "movq %%mm2, %%mm5\n\t" | 1134 "movq %%mm2, %%mm5 \n\t" |
1136 "punpcklwd %5, %%mm0\n\t" | 1135 "punpcklwd %5, %%mm0 \n\t" |
1137 "punpcklwd %5, %%mm1\n\t" | 1136 "punpcklwd %5, %%mm1 \n\t" |
1138 "punpcklwd %5, %%mm2\n\t" | 1137 "punpcklwd %5, %%mm2 \n\t" |
1139 "punpckhwd %5, %%mm3\n\t" | 1138 "punpckhwd %5, %%mm3 \n\t" |
1140 "punpckhwd %5, %%mm4\n\t" | 1139 "punpckhwd %5, %%mm4 \n\t" |
1141 "punpckhwd %5, %%mm5\n\t" | 1140 "punpckhwd %5, %%mm5 \n\t" |
1142 "psllq $8, %%mm1\n\t" | 1141 "psllq $8, %%mm1 \n\t" |
1143 "psllq $16, %%mm2\n\t" | 1142 "psllq $16, %%mm2 \n\t" |
1144 "por %%mm1, %%mm0\n\t" | 1143 "por %%mm1, %%mm0 \n\t" |
1145 "por %%mm2, %%mm0\n\t" | 1144 "por %%mm2, %%mm0 \n\t" |
1146 "psllq $8, %%mm4\n\t" | 1145 "psllq $8, %%mm4 \n\t" |
1147 "psllq $16, %%mm5\n\t" | 1146 "psllq $16, %%mm5 \n\t" |
1148 "por %%mm4, %%mm3\n\t" | 1147 "por %%mm4, %%mm3 \n\t" |
1149 "por %%mm5, %%mm3\n\t" | 1148 "por %%mm5, %%mm3 \n\t" |
1150 :"=m"(*d) | 1149 :"=m"(*d) |
1151 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | 1150 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) |
1152 :"memory"); | 1151 :"memory"); |
1153 /* Borrowed 32 to 24 */ | 1152 /* Borrowed 32 to 24 */ |
1154 __asm __volatile( | 1153 __asm __volatile( |
1155 "movq %%mm0, %%mm4\n\t" | 1154 "movq %%mm0, %%mm4 \n\t" |
1156 "movq %%mm3, %%mm5\n\t" | 1155 "movq %%mm3, %%mm5 \n\t" |
1157 "movq %%mm6, %%mm0\n\t" | 1156 "movq %%mm6, %%mm0 \n\t" |
1158 "movq %%mm7, %%mm1\n\t" | 1157 "movq %%mm7, %%mm1 \n\t" |
1159 | 1158 |
1160 "movq %%mm4, %%mm6\n\t" | 1159 "movq %%mm4, %%mm6 \n\t" |
1161 "movq %%mm5, %%mm7\n\t" | 1160 "movq %%mm5, %%mm7 \n\t" |
1162 "movq %%mm0, %%mm2\n\t" | 1161 "movq %%mm0, %%mm2 \n\t" |
1163 "movq %%mm1, %%mm3\n\t" | 1162 "movq %%mm1, %%mm3 \n\t" |
1164 | 1163 |
1165 "psrlq $8, %%mm2\n\t" | 1164 "psrlq $8, %%mm2 \n\t" |
1166 "psrlq $8, %%mm3\n\t" | 1165 "psrlq $8, %%mm3 \n\t" |
1167 "psrlq $8, %%mm6\n\t" | 1166 "psrlq $8, %%mm6 \n\t" |
1168 "psrlq $8, %%mm7\n\t" | 1167 "psrlq $8, %%mm7 \n\t" |
1169 "pand %2, %%mm0\n\t" | 1168 "pand %2, %%mm0 \n\t" |
1170 "pand %2, %%mm1\n\t" | 1169 "pand %2, %%mm1 \n\t" |
1171 "pand %2, %%mm4\n\t" | 1170 "pand %2, %%mm4 \n\t" |
1172 "pand %2, %%mm5\n\t" | 1171 "pand %2, %%mm5 \n\t" |
1173 "pand %3, %%mm2\n\t" | 1172 "pand %3, %%mm2 \n\t" |
1174 "pand %3, %%mm3\n\t" | 1173 "pand %3, %%mm3 \n\t" |
1175 "pand %3, %%mm6\n\t" | 1174 "pand %3, %%mm6 \n\t" |
1176 "pand %3, %%mm7\n\t" | 1175 "pand %3, %%mm7 \n\t" |
1177 "por %%mm2, %%mm0\n\t" | 1176 "por %%mm2, %%mm0 \n\t" |
1178 "por %%mm3, %%mm1\n\t" | 1177 "por %%mm3, %%mm1 \n\t" |
1179 "por %%mm6, %%mm4\n\t" | 1178 "por %%mm6, %%mm4 \n\t" |
1180 "por %%mm7, %%mm5\n\t" | 1179 "por %%mm7, %%mm5 \n\t" |
1181 | 1180 |
1182 "movq %%mm1, %%mm2\n\t" | 1181 "movq %%mm1, %%mm2 \n\t" |
1183 "movq %%mm4, %%mm3\n\t" | 1182 "movq %%mm4, %%mm3 \n\t" |
1184 "psllq $48, %%mm2\n\t" | 1183 "psllq $48, %%mm2 \n\t" |
1185 "psllq $32, %%mm3\n\t" | 1184 "psllq $32, %%mm3 \n\t" |
1186 "pand %4, %%mm2\n\t" | 1185 "pand %4, %%mm2 \n\t" |
1187 "pand %5, %%mm3\n\t" | 1186 "pand %5, %%mm3 \n\t" |
1188 "por %%mm2, %%mm0\n\t" | 1187 "por %%mm2, %%mm0 \n\t" |
1189 "psrlq $16, %%mm1\n\t" | 1188 "psrlq $16, %%mm1 \n\t" |
1190 "psrlq $32, %%mm4\n\t" | 1189 "psrlq $32, %%mm4 \n\t" |
1191 "psllq $16, %%mm5\n\t" | 1190 "psllq $16, %%mm5 \n\t" |
1192 "por %%mm3, %%mm1\n\t" | 1191 "por %%mm3, %%mm1 \n\t" |
1193 "pand %6, %%mm5\n\t" | 1192 "pand %6, %%mm5 \n\t" |
1194 "por %%mm5, %%mm4\n\t" | 1193 "por %%mm5, %%mm4 \n\t" |
1195 | 1194 |
1196 MOVNTQ" %%mm0, %0\n\t" | 1195 MOVNTQ" %%mm0, %0 \n\t" |
1197 MOVNTQ" %%mm1, 8%0\n\t" | 1196 MOVNTQ" %%mm1, 8%0 \n\t" |
1198 MOVNTQ" %%mm4, 16%0" | 1197 MOVNTQ" %%mm4, 16%0" |
1199 | 1198 |
1200 :"=m"(*d) | 1199 :"=m"(*d) |
1201 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | 1200 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) |
1202 :"memory"); | 1201 :"memory"); |
1203 d += 24; | 1202 d += 24; |
1204 s += 8; | 1203 s += 8; |
1205 } | 1204 } |
1206 __asm __volatile(SFENCE:::"memory"); | 1205 __asm __volatile(SFENCE:::"memory"); |
1207 __asm __volatile(EMMS:::"memory"); | 1206 __asm __volatile(EMMS:::"memory"); |
1208 #endif | 1207 #endif |
1209 while(s < end) | 1208 while (s < end) |
1210 { | 1209 { |
1211 register uint16_t bgr; | 1210 register uint16_t bgr; |
1212 bgr = *s++; | 1211 bgr = *s++; |
1213 *d++ = (bgr&0x1F)<<3; | 1212 *d++ = (bgr&0x1F)<<3; |
1214 *d++ = (bgr&0x7E0)>>3; | 1213 *d++ = (bgr&0x7E0)>>3; |
1215 *d++ = (bgr&0xF800)>>8; | 1214 *d++ = (bgr&0xF800)>>8; |
1216 } | 1215 } |
1217 } | 1216 } |
1218 | 1217 |
1219 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size) | 1218 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size) |
1220 { | 1219 { |
1221 const uint16_t *end; | 1220 const uint16_t *end; |
1222 #ifdef HAVE_MMX | 1221 #ifdef HAVE_MMX |
1223 const uint16_t *mm_end; | 1222 const uint16_t *mm_end; |
1224 #endif | 1223 #endif |
1225 uint8_t *d = (uint8_t *)dst; | 1224 uint8_t *d = (uint8_t *)dst; |
1226 const uint16_t *s = (const uint16_t *)src; | 1225 const uint16_t *s = (const uint16_t *)src; |
1227 end = s + src_size/2; | 1226 end = s + src_size/2; |
1228 #ifdef HAVE_MMX | 1227 #ifdef HAVE_MMX |
1229 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 1228 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1230 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | 1229 __asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); |
1231 mm_end = end - 3; | 1230 mm_end = end - 3; |
1232 while(s < mm_end) | 1231 while (s < mm_end) |
1233 { | 1232 { |
1234 __asm __volatile( | 1233 __asm __volatile( |
1235 PREFETCH" 32%1\n\t" | 1234 PREFETCH" 32%1 \n\t" |
1236 "movq %1, %%mm0\n\t" | 1235 "movq %1, %%mm0 \n\t" |
1237 "movq %1, %%mm1\n\t" | 1236 "movq %1, %%mm1 \n\t" |
1238 "movq %1, %%mm2\n\t" | 1237 "movq %1, %%mm2 \n\t" |
1239 "pand %2, %%mm0\n\t" | 1238 "pand %2, %%mm0 \n\t" |
1240 "pand %3, %%mm1\n\t" | 1239 "pand %3, %%mm1 \n\t" |
1241 "pand %4, %%mm2\n\t" | 1240 "pand %4, %%mm2 \n\t" |
1242 "psllq $3, %%mm0\n\t" | 1241 "psllq $3, %%mm0 \n\t" |
1243 "psrlq $2, %%mm1\n\t" | 1242 "psrlq $2, %%mm1 \n\t" |
1244 "psrlq $7, %%mm2\n\t" | 1243 "psrlq $7, %%mm2 \n\t" |
1245 "movq %%mm0, %%mm3\n\t" | 1244 "movq %%mm0, %%mm3 \n\t" |
1246 "movq %%mm1, %%mm4\n\t" | 1245 "movq %%mm1, %%mm4 \n\t" |
1247 "movq %%mm2, %%mm5\n\t" | 1246 "movq %%mm2, %%mm5 \n\t" |
1248 "punpcklwd %%mm7, %%mm0\n\t" | 1247 "punpcklwd %%mm7, %%mm0 \n\t" |
1249 "punpcklwd %%mm7, %%mm1\n\t" | 1248 "punpcklwd %%mm7, %%mm1 \n\t" |
1250 "punpcklwd %%mm7, %%mm2\n\t" | 1249 "punpcklwd %%mm7, %%mm2 \n\t" |
1251 "punpckhwd %%mm7, %%mm3\n\t" | 1250 "punpckhwd %%mm7, %%mm3 \n\t" |
1252 "punpckhwd %%mm7, %%mm4\n\t" | 1251 "punpckhwd %%mm7, %%mm4 \n\t" |
1253 "punpckhwd %%mm7, %%mm5\n\t" | 1252 "punpckhwd %%mm7, %%mm5 \n\t" |
1254 "psllq $8, %%mm1\n\t" | 1253 "psllq $8, %%mm1 \n\t" |
1255 "psllq $16, %%mm2\n\t" | 1254 "psllq $16, %%mm2 \n\t" |
1256 "por %%mm1, %%mm0\n\t" | 1255 "por %%mm1, %%mm0 \n\t" |
1257 "por %%mm2, %%mm0\n\t" | 1256 "por %%mm2, %%mm0 \n\t" |
1258 "psllq $8, %%mm4\n\t" | 1257 "psllq $8, %%mm4 \n\t" |
1259 "psllq $16, %%mm5\n\t" | 1258 "psllq $16, %%mm5 \n\t" |
1260 "por %%mm4, %%mm3\n\t" | 1259 "por %%mm4, %%mm3 \n\t" |
1261 "por %%mm5, %%mm3\n\t" | 1260 "por %%mm5, %%mm3 \n\t" |
1262 MOVNTQ" %%mm0, %0\n\t" | 1261 MOVNTQ" %%mm0, %0 \n\t" |
1263 MOVNTQ" %%mm3, 8%0\n\t" | 1262 MOVNTQ" %%mm3, 8%0 \n\t" |
1264 :"=m"(*d) | 1263 :"=m"(*d) |
1265 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) | 1264 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) |
1266 :"memory"); | 1265 :"memory"); |
1267 d += 16; | 1266 d += 16; |
1268 s += 4; | 1267 s += 4; |
1269 } | 1268 } |
1270 __asm __volatile(SFENCE:::"memory"); | 1269 __asm __volatile(SFENCE:::"memory"); |
1271 __asm __volatile(EMMS:::"memory"); | 1270 __asm __volatile(EMMS:::"memory"); |
1272 #endif | 1271 #endif |
1273 while(s < end) | 1272 while (s < end) |
1274 { | 1273 { |
1275 #if 0 //slightly slower on athlon | 1274 #if 0 //slightly slower on athlon |
1276 int bgr= *s++; | 1275 int bgr= *s++; |
1277 *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9); | 1276 *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9); |
1278 #else | 1277 #else |
1279 register uint16_t bgr; | 1278 register uint16_t bgr; |
1280 bgr = *s++; | 1279 bgr = *s++; |
1281 #ifdef WORDS_BIGENDIAN | 1280 #ifdef WORDS_BIGENDIAN |
1282 *d++ = 0; | 1281 *d++ = 0; |
1283 *d++ = (bgr&0x7C00)>>7; | 1282 *d++ = (bgr&0x7C00)>>7; |
1284 *d++ = (bgr&0x3E0)>>2; | 1283 *d++ = (bgr&0x3E0)>>2; |
1285 *d++ = (bgr&0x1F)<<3; | 1284 *d++ = (bgr&0x1F)<<3; |
1286 #else | 1285 #else |
1287 *d++ = (bgr&0x1F)<<3; | 1286 *d++ = (bgr&0x1F)<<3; |
1288 *d++ = (bgr&0x3E0)>>2; | 1287 *d++ = (bgr&0x3E0)>>2; |
1289 *d++ = (bgr&0x7C00)>>7; | 1288 *d++ = (bgr&0x7C00)>>7; |
1290 *d++ = 0; | 1289 *d++ = 0; |
1291 #endif | 1290 #endif |
1292 | 1291 |
1293 #endif | 1292 #endif |
1294 } | 1293 } |
1295 } | 1294 } |
1296 | 1295 |
1297 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size) | 1296 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size) |
1298 { | 1297 { |
1299 const uint16_t *end; | 1298 const uint16_t *end; |
1300 #ifdef HAVE_MMX | 1299 #ifdef HAVE_MMX |
1301 const uint16_t *mm_end; | 1300 const uint16_t *mm_end; |
1302 #endif | 1301 #endif |
1303 uint8_t *d = (uint8_t *)dst; | 1302 uint8_t *d = (uint8_t *)dst; |
1304 const uint16_t *s = (uint16_t *)src; | 1303 const uint16_t *s = (uint16_t *)src; |
1305 end = s + src_size/2; | 1304 end = s + src_size/2; |
1306 #ifdef HAVE_MMX | 1305 #ifdef HAVE_MMX |
1307 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 1306 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1308 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | 1307 __asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); |
1309 mm_end = end - 3; | 1308 mm_end = end - 3; |
1310 while(s < mm_end) | 1309 while (s < mm_end) |
1311 { | 1310 { |
1312 __asm __volatile( | 1311 __asm __volatile( |
1313 PREFETCH" 32%1\n\t" | 1312 PREFETCH" 32%1 \n\t" |
1314 "movq %1, %%mm0\n\t" | 1313 "movq %1, %%mm0 \n\t" |
1315 "movq %1, %%mm1\n\t" | 1314 "movq %1, %%mm1 \n\t" |
1316 "movq %1, %%mm2\n\t" | 1315 "movq %1, %%mm2 \n\t" |
1317 "pand %2, %%mm0\n\t" | 1316 "pand %2, %%mm0 \n\t" |
1318 "pand %3, %%mm1\n\t" | 1317 "pand %3, %%mm1 \n\t" |
1319 "pand %4, %%mm2\n\t" | 1318 "pand %4, %%mm2 \n\t" |
1320 "psllq $3, %%mm0\n\t" | 1319 "psllq $3, %%mm0 \n\t" |
1321 "psrlq $3, %%mm1\n\t" | 1320 "psrlq $3, %%mm1 \n\t" |
1322 "psrlq $8, %%mm2\n\t" | 1321 "psrlq $8, %%mm2 \n\t" |
1323 "movq %%mm0, %%mm3\n\t" | 1322 "movq %%mm0, %%mm3 \n\t" |
1324 "movq %%mm1, %%mm4\n\t" | 1323 "movq %%mm1, %%mm4 \n\t" |
1325 "movq %%mm2, %%mm5\n\t" | 1324 "movq %%mm2, %%mm5 \n\t" |
1326 "punpcklwd %%mm7, %%mm0\n\t" | 1325 "punpcklwd %%mm7, %%mm0 \n\t" |
1327 "punpcklwd %%mm7, %%mm1\n\t" | 1326 "punpcklwd %%mm7, %%mm1 \n\t" |
1328 "punpcklwd %%mm7, %%mm2\n\t" | 1327 "punpcklwd %%mm7, %%mm2 \n\t" |
1329 "punpckhwd %%mm7, %%mm3\n\t" | 1328 "punpckhwd %%mm7, %%mm3 \n\t" |
1330 "punpckhwd %%mm7, %%mm4\n\t" | 1329 "punpckhwd %%mm7, %%mm4 \n\t" |
1331 "punpckhwd %%mm7, %%mm5\n\t" | 1330 "punpckhwd %%mm7, %%mm5 \n\t" |
1332 "psllq $8, %%mm1\n\t" | 1331 "psllq $8, %%mm1 \n\t" |
1333 "psllq $16, %%mm2\n\t" | 1332 "psllq $16, %%mm2 \n\t" |
1334 "por %%mm1, %%mm0\n\t" | 1333 "por %%mm1, %%mm0 \n\t" |
1335 "por %%mm2, %%mm0\n\t" | 1334 "por %%mm2, %%mm0 \n\t" |
1336 "psllq $8, %%mm4\n\t" | 1335 "psllq $8, %%mm4 \n\t" |
1337 "psllq $16, %%mm5\n\t" | 1336 "psllq $16, %%mm5 \n\t" |
1338 "por %%mm4, %%mm3\n\t" | 1337 "por %%mm4, %%mm3 \n\t" |
1339 "por %%mm5, %%mm3\n\t" | 1338 "por %%mm5, %%mm3 \n\t" |
1340 MOVNTQ" %%mm0, %0\n\t" | 1339 MOVNTQ" %%mm0, %0 \n\t" |
1341 MOVNTQ" %%mm3, 8%0\n\t" | 1340 MOVNTQ" %%mm3, 8%0 \n\t" |
1342 :"=m"(*d) | 1341 :"=m"(*d) |
1343 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) | 1342 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) |
1344 :"memory"); | 1343 :"memory"); |
1345 d += 16; | 1344 d += 16; |
1346 s += 4; | 1345 s += 4; |
1347 } | 1346 } |
1348 __asm __volatile(SFENCE:::"memory"); | 1347 __asm __volatile(SFENCE:::"memory"); |
1349 __asm __volatile(EMMS:::"memory"); | 1348 __asm __volatile(EMMS:::"memory"); |
1350 #endif | 1349 #endif |
1351 while(s < end) | 1350 while (s < end) |
1352 { | 1351 { |
1353 register uint16_t bgr; | 1352 register uint16_t bgr; |
1354 bgr = *s++; | 1353 bgr = *s++; |
1355 #ifdef WORDS_BIGENDIAN | 1354 #ifdef WORDS_BIGENDIAN |
1356 *d++ = 0; | 1355 *d++ = 0; |
1357 *d++ = (bgr&0xF800)>>8; | 1356 *d++ = (bgr&0xF800)>>8; |
1358 *d++ = (bgr&0x7E0)>>3; | 1357 *d++ = (bgr&0x7E0)>>3; |
1359 *d++ = (bgr&0x1F)<<3; | 1358 *d++ = (bgr&0x1F)<<3; |
1360 #else | 1359 #else |
1361 *d++ = (bgr&0x1F)<<3; | 1360 *d++ = (bgr&0x1F)<<3; |
1362 *d++ = (bgr&0x7E0)>>3; | 1361 *d++ = (bgr&0x7E0)>>3; |
1363 *d++ = (bgr&0xF800)>>8; | 1362 *d++ = (bgr&0xF800)>>8; |
1364 *d++ = 0; | 1363 *d++ = 0; |
1365 #endif | 1364 #endif |
1366 } | 1365 } |
1367 } | 1366 } |
1368 | 1367 |
1369 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | 1368 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) |
1370 { | 1369 { |
1371 long idx = 15 - src_size; | 1370 long idx = 15 - src_size; |
1372 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx; | 1371 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx; |
1373 #ifdef HAVE_MMX | 1372 #ifdef HAVE_MMX |
1374 __asm __volatile( | 1373 __asm __volatile( |
1375 "test %0, %0 \n\t" | 1374 "test %0, %0 \n\t" |
1376 "jns 2f \n\t" | 1375 "jns 2f \n\t" |
1377 PREFETCH" (%1, %0) \n\t" | 1376 PREFETCH" (%1, %0) \n\t" |
1378 "movq %3, %%mm7 \n\t" | 1377 "movq %3, %%mm7 \n\t" |
1379 "pxor %4, %%mm7 \n\t" | 1378 "pxor %4, %%mm7 \n\t" |
1380 "movq %%mm7, %%mm6 \n\t" | 1379 "movq %%mm7, %%mm6 \n\t" |
1381 "pxor %5, %%mm7 \n\t" | 1380 "pxor %5, %%mm7 \n\t" |
1382 ASMALIGN(4) | 1381 ASMALIGN(4) |
1383 "1: \n\t" | 1382 "1: \n\t" |
1384 PREFETCH" 32(%1, %0) \n\t" | 1383 PREFETCH" 32(%1, %0) \n\t" |
1385 "movq (%1, %0), %%mm0 \n\t" | 1384 "movq (%1, %0), %%mm0 \n\t" |
1386 "movq 8(%1, %0), %%mm1 \n\t" | 1385 "movq 8(%1, %0), %%mm1 \n\t" |
1387 # ifdef HAVE_MMX2 | 1386 # ifdef HAVE_MMX2 |
1388 "pshufw $177, %%mm0, %%mm3 \n\t" | 1387 "pshufw $177, %%mm0, %%mm3 \n\t" |
1389 "pshufw $177, %%mm1, %%mm5 \n\t" | 1388 "pshufw $177, %%mm1, %%mm5 \n\t" |
1390 "pand %%mm7, %%mm0 \n\t" | 1389 "pand %%mm7, %%mm0 \n\t" |
1391 "pand %%mm6, %%mm3 \n\t" | 1390 "pand %%mm6, %%mm3 \n\t" |
1392 "pand %%mm7, %%mm1 \n\t" | 1391 "pand %%mm7, %%mm1 \n\t" |
1393 "pand %%mm6, %%mm5 \n\t" | 1392 "pand %%mm6, %%mm5 \n\t" |
1394 "por %%mm3, %%mm0 \n\t" | 1393 "por %%mm3, %%mm0 \n\t" |
1395 "por %%mm5, %%mm1 \n\t" | 1394 "por %%mm5, %%mm1 \n\t" |
1396 # else | 1395 # else |
1397 "movq %%mm0, %%mm2 \n\t" | 1396 "movq %%mm0, %%mm2 \n\t" |
1398 "movq %%mm1, %%mm4 \n\t" | 1397 "movq %%mm1, %%mm4 \n\t" |
1399 "pand %%mm7, %%mm0 \n\t" | 1398 "pand %%mm7, %%mm0 \n\t" |
1400 "pand %%mm6, %%mm2 \n\t" | 1399 "pand %%mm6, %%mm2 \n\t" |
1401 "pand %%mm7, %%mm1 \n\t" | 1400 "pand %%mm7, %%mm1 \n\t" |
1402 "pand %%mm6, %%mm4 \n\t" | 1401 "pand %%mm6, %%mm4 \n\t" |
1403 "movq %%mm2, %%mm3 \n\t" | 1402 "movq %%mm2, %%mm3 \n\t" |
1404 "movq %%mm4, %%mm5 \n\t" | 1403 "movq %%mm4, %%mm5 \n\t" |
1405 "pslld $16, %%mm2 \n\t" | 1404 "pslld $16, %%mm2 \n\t" |
1406 "psrld $16, %%mm3 \n\t" | 1405 "psrld $16, %%mm3 \n\t" |
1407 "pslld $16, %%mm4 \n\t" | 1406 "pslld $16, %%mm4 \n\t" |
1408 "psrld $16, %%mm5 \n\t" | 1407 "psrld $16, %%mm5 \n\t" |
1409 "por %%mm2, %%mm0 \n\t" | 1408 "por %%mm2, %%mm0 \n\t" |
1410 "por %%mm4, %%mm1 \n\t" | 1409 "por %%mm4, %%mm1 \n\t" |
1411 "por %%mm3, %%mm0 \n\t" | 1410 "por %%mm3, %%mm0 \n\t" |
1412 "por %%mm5, %%mm1 \n\t" | 1411 "por %%mm5, %%mm1 \n\t" |
1413 # endif | 1412 # endif |
1414 MOVNTQ" %%mm0, (%2, %0) \n\t" | 1413 MOVNTQ" %%mm0, (%2, %0) \n\t" |
1415 MOVNTQ" %%mm1, 8(%2, %0) \n\t" | 1414 MOVNTQ" %%mm1, 8(%2, %0) \n\t" |
1416 "add $16, %0 \n\t" | 1415 "add $16, %0 \n\t" |
1417 "js 1b \n\t" | 1416 "js 1b \n\t" |
1418 SFENCE" \n\t" | 1417 SFENCE" \n\t" |
1419 EMMS" \n\t" | 1418 EMMS" \n\t" |
1420 "2: \n\t" | 1419 "2: \n\t" |
1421 : "+&r"(idx) | 1420 : "+&r"(idx) |
1422 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) | 1421 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) |
1423 : "memory"); | 1422 : "memory"); |
1424 #endif | 1423 #endif |
1425 for (; idx<15; idx+=4) { | 1424 for (; idx<15; idx+=4) { |
1426 register int v = *(uint32_t *)&s[idx], g = v & 0xff00ff00; | 1425 register int v = *(uint32_t *)&s[idx], g = v & 0xff00ff00; |
1427 v &= 0xff00ff; | 1426 v &= 0xff00ff; |
1428 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); | 1427 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); |
1429 } | 1428 } |
1430 } | 1429 } |
1431 | 1430 |
1432 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) | 1431 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
1433 { | 1432 { |
1434 unsigned i; | 1433 unsigned i; |
1435 #ifdef HAVE_MMX | 1434 #ifdef HAVE_MMX |
1436 long mmx_size= 23 - src_size; | 1435 long mmx_size= 23 - src_size; |
1437 asm volatile ( | 1436 asm volatile ( |
1438 "test %%"REG_a", %%"REG_a" \n\t" | 1437 "test %%"REG_a", %%"REG_a" \n\t" |
1439 "jns 2f \n\t" | 1438 "jns 2f \n\t" |
1440 "movq "MANGLE(mask24r)", %%mm5 \n\t" | 1439 "movq "MANGLE(mask24r)", %%mm5 \n\t" |
1441 "movq "MANGLE(mask24g)", %%mm6 \n\t" | 1440 "movq "MANGLE(mask24g)", %%mm6 \n\t" |
1442 "movq "MANGLE(mask24b)", %%mm7 \n\t" | 1441 "movq "MANGLE(mask24b)", %%mm7 \n\t" |
1443 ASMALIGN(4) | 1442 ASMALIGN(4) |
1444 "1: \n\t" | 1443 "1: \n\t" |
1445 PREFETCH" 32(%1, %%"REG_a") \n\t" | 1444 PREFETCH" 32(%1, %%"REG_a") \n\t" |
1446 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG | 1445 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG |
1447 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG | 1446 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG |
1448 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B | 1447 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B |
1449 "psllq $16, %%mm0 \n\t" // 00 BGR BGR | 1448 "psllq $16, %%mm0 \n\t" // 00 BGR BGR |
1450 "pand %%mm5, %%mm0 \n\t" | 1449 "pand %%mm5, %%mm0 \n\t" |
1451 "pand %%mm6, %%mm1 \n\t" | 1450 "pand %%mm6, %%mm1 \n\t" |
1452 "pand %%mm7, %%mm2 \n\t" | 1451 "pand %%mm7, %%mm2 \n\t" |
1453 "por %%mm0, %%mm1 \n\t" | 1452 "por %%mm0, %%mm1 \n\t" |
1454 "por %%mm2, %%mm1 \n\t" | 1453 "por %%mm2, %%mm1 \n\t" |
1455 "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG | 1454 "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG |
1456 MOVNTQ" %%mm1, (%2, %%"REG_a")\n\t" // RGB RGB RG | 1455 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG |
1457 "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B | 1456 "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B |
1458 "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR | 1457 "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR |
1459 "pand %%mm7, %%mm0 \n\t" | 1458 "pand %%mm7, %%mm0 \n\t" |
1460 "pand %%mm5, %%mm1 \n\t" | 1459 "pand %%mm5, %%mm1 \n\t" |
1461 "pand %%mm6, %%mm2 \n\t" | 1460 "pand %%mm6, %%mm2 \n\t" |
1462 "por %%mm0, %%mm1 \n\t" | 1461 "por %%mm0, %%mm1 \n\t" |
1463 "por %%mm2, %%mm1 \n\t" | 1462 "por %%mm2, %%mm1 \n\t" |
1464 "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B | 1463 "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B |
1465 MOVNTQ" %%mm1, 8(%2, %%"REG_a")\n\t" // B RGB RGB R | 1464 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R |
1466 "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR | 1465 "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR |
1467 "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG | 1466 "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG |
1468 "pand %%mm6, %%mm0 \n\t" | 1467 "pand %%mm6, %%mm0 \n\t" |
1469 "pand %%mm7, %%mm1 \n\t" | 1468 "pand %%mm7, %%mm1 \n\t" |
1470 "pand %%mm5, %%mm2 \n\t" | 1469 "pand %%mm5, %%mm2 \n\t" |
1471 "por %%mm0, %%mm1 \n\t" | 1470 "por %%mm0, %%mm1 \n\t" |
1472 "por %%mm2, %%mm1 \n\t" | 1471 "por %%mm2, %%mm1 \n\t" |
1473 MOVNTQ" %%mm1, 16(%2, %%"REG_a")\n\t" | 1472 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" |
1474 "add $24, %%"REG_a" \n\t" | 1473 "add $24, %%"REG_a" \n\t" |
1475 " js 1b \n\t" | 1474 " js 1b \n\t" |
1476 "2: \n\t" | 1475 "2: \n\t" |
1477 : "+a" (mmx_size) | 1476 : "+a" (mmx_size) |
1478 : "r" (src-mmx_size), "r"(dst-mmx_size) | 1477 : "r" (src-mmx_size), "r"(dst-mmx_size) |
1479 ); | 1478 ); |
1480 | 1479 |
1481 __asm __volatile(SFENCE:::"memory"); | 1480 __asm __volatile(SFENCE:::"memory"); |
1482 __asm __volatile(EMMS:::"memory"); | 1481 __asm __volatile(EMMS:::"memory"); |
1483 | 1482 |
1484 if(mmx_size==23) return; //finihsed, was multiple of 8 | 1483 if (mmx_size==23) return; //finihsed, was multiple of 8 |
1485 | 1484 |
1486 src+= src_size; | 1485 src+= src_size; |
1487 dst+= src_size; | 1486 dst+= src_size; |
1488 src_size= 23-mmx_size; | 1487 src_size= 23-mmx_size; |
1489 src-= src_size; | 1488 src-= src_size; |
1490 dst-= src_size; | 1489 dst-= src_size; |
1491 #endif | 1490 #endif |
1492 for(i=0; i<src_size; i+=3) | 1491 for (i=0; i<src_size; i+=3) |
1493 { | 1492 { |
1494 register uint8_t x; | 1493 register uint8_t x; |
1495 x = src[i + 2]; | 1494 x = src[i + 2]; |
1496 dst[i + 1] = src[i + 1]; | 1495 dst[i + 1] = src[i + 1]; |
1497 dst[i + 2] = src[i + 0]; | 1496 dst[i + 2] = src[i + 0]; |
1498 dst[i + 0] = x; | 1497 dst[i + 0] = x; |
1499 } | 1498 } |
1500 } | 1499 } |
1501 | 1500 |
1502 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | 1501 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1503 long width, long height, | 1502 long width, long height, |
1504 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | 1503 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
1505 { | 1504 { |
1506 long y; | 1505 long y; |
1507 const long chromWidth= width>>1; | 1506 const long chromWidth= width>>1; |
1508 for(y=0; y<height; y++) | 1507 for (y=0; y<height; y++) |
1509 { | 1508 { |
1510 #ifdef HAVE_MMX | 1509 #ifdef HAVE_MMX |
1511 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) | 1510 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) |
1512 asm volatile( | 1511 asm volatile( |
1513 "xor %%"REG_a", %%"REG_a" \n\t" | 1512 "xor %%"REG_a", %%"REG_a" \n\t" |
1514 ASMALIGN(4) | 1513 ASMALIGN(4) |
1515 "1: \n\t" | 1514 "1: \n\t" |
1516 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | 1515 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" |
1517 PREFETCH" 32(%2, %%"REG_a") \n\t" | 1516 PREFETCH" 32(%2, %%"REG_a") \n\t" |
1518 PREFETCH" 32(%3, %%"REG_a") \n\t" | 1517 PREFETCH" 32(%3, %%"REG_a") \n\t" |
1519 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) | 1518 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) |
1520 "movq %%mm0, %%mm2 \n\t" // U(0) | 1519 "movq %%mm0, %%mm2 \n\t" // U(0) |
1521 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) | 1520 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) |
1522 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | 1521 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) |
1523 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | 1522 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) |
1524 | 1523 |
1525 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) | 1524 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) |
1526 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) | 1525 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) |
1527 "movq %%mm3, %%mm4 \n\t" // Y(0) | 1526 "movq %%mm3, %%mm4 \n\t" // Y(0) |
1528 "movq %%mm5, %%mm6 \n\t" // Y(8) | 1527 "movq %%mm5, %%mm6 \n\t" // Y(8) |
1529 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) | 1528 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) |
1530 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) | 1529 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) |
1531 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) | 1530 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) |
1532 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) | 1531 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) |
1533 | 1532 |
1534 MOVNTQ" %%mm3, (%0, %%"REG_a", 4)\n\t" | 1533 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t" |
1535 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4)\n\t" | 1534 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" |
1536 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4)\n\t" | 1535 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t" |
1537 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4)\n\t" | 1536 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" |
1538 | 1537 |
1539 "add $8, %%"REG_a" \n\t" | 1538 "add $8, %%"REG_a" \n\t" |
1540 "cmp %4, %%"REG_a" \n\t" | 1539 "cmp %4, %%"REG_a" \n\t" |
1541 " jb 1b \n\t" | 1540 " jb 1b \n\t" |
1542 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) | 1541 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) |
1543 : "%"REG_a | 1542 : "%"REG_a |
1544 ); | 1543 ); |
1545 #else | 1544 #else |
1546 | 1545 |
1547 #if defined ARCH_ALPHA && defined HAVE_MVI | 1546 #if defined ARCH_ALPHA && defined HAVE_MVI |
1548 #define pl2yuy2(n) \ | 1547 #define pl2yuy2(n) \ |
1549 y1 = yc[n]; \ | 1548 y1 = yc[n]; \ |
1550 y2 = yc2[n]; \ | 1549 y2 = yc2[n]; \ |
1551 u = uc[n]; \ | 1550 u = uc[n]; \ |
1552 v = vc[n]; \ | 1551 v = vc[n]; \ |
1553 asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \ | 1552 asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \ |
1554 asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \ | 1553 asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \ |
1555 asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \ | 1554 asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \ |
1556 asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \ | 1555 asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \ |
1557 yuv1 = (u << 8) + (v << 24); \ | 1556 yuv1 = (u << 8) + (v << 24); \ |
1558 yuv2 = yuv1 + y2; \ | 1557 yuv2 = yuv1 + y2; \ |
1559 yuv1 += y1; \ | 1558 yuv1 += y1; \ |
1560 qdst[n] = yuv1; \ | 1559 qdst[n] = yuv1; \ |
1561 qdst2[n] = yuv2; | 1560 qdst2[n] = yuv2; |
1562 | 1561 |
1563 int i; | 1562 int i; |
1564 uint64_t *qdst = (uint64_t *) dst; | 1563 uint64_t *qdst = (uint64_t *) dst; |
1565 uint64_t *qdst2 = (uint64_t *) (dst + dstStride); | 1564 uint64_t *qdst2 = (uint64_t *) (dst + dstStride); |
1566 const uint32_t *yc = (uint32_t *) ysrc; | 1565 const uint32_t *yc = (uint32_t *) ysrc; |
1567 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); | 1566 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); |
1568 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; | 1567 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; |
1569 for(i = 0; i < chromWidth; i += 8){ | 1568 for (i = 0; i < chromWidth; i += 8){ |
1570 uint64_t y1, y2, yuv1, yuv2; | 1569 uint64_t y1, y2, yuv1, yuv2; |
1571 uint64_t u, v; | 1570 uint64_t u, v; |
1572 /* Prefetch */ | 1571 /* Prefetch */ |
1573 asm("ldq $31,64(%0)" :: "r"(yc)); | 1572 asm("ldq $31,64(%0)" :: "r"(yc)); |
1574 asm("ldq $31,64(%0)" :: "r"(yc2)); | 1573 asm("ldq $31,64(%0)" :: "r"(yc2)); |
1575 asm("ldq $31,64(%0)" :: "r"(uc)); | 1574 asm("ldq $31,64(%0)" :: "r"(uc)); |
1576 asm("ldq $31,64(%0)" :: "r"(vc)); | 1575 asm("ldq $31,64(%0)" :: "r"(vc)); |
1577 | 1576 |
1578 pl2yuy2(0); | 1577 pl2yuy2(0); |
1579 pl2yuy2(1); | 1578 pl2yuy2(1); |
1580 pl2yuy2(2); | 1579 pl2yuy2(2); |
1581 pl2yuy2(3); | 1580 pl2yuy2(3); |
1582 | 1581 |
1583 yc += 4; | 1582 yc += 4; |
1584 yc2 += 4; | 1583 yc2 += 4; |
1585 uc += 4; | 1584 uc += 4; |
1586 vc += 4; | 1585 vc += 4; |
1587 qdst += 4; | 1586 qdst += 4; |
1588 qdst2 += 4; | 1587 qdst2 += 4; |
1589 } | 1588 } |
1590 y++; | 1589 y++; |
1591 ysrc += lumStride; | 1590 ysrc += lumStride; |
1592 dst += dstStride; | 1591 dst += dstStride; |
1593 | 1592 |
1594 #elif __WORDSIZE >= 64 | 1593 #elif __WORDSIZE >= 64 |
1595 int i; | 1594 int i; |
1596 uint64_t *ldst = (uint64_t *) dst; | 1595 uint64_t *ldst = (uint64_t *) dst; |
1597 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | 1596 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
1598 for(i = 0; i < chromWidth; i += 2){ | 1597 for (i = 0; i < chromWidth; i += 2){ |
1599 uint64_t k, l; | 1598 uint64_t k, l; |
1600 k = yc[0] + (uc[0] << 8) + | 1599 k = yc[0] + (uc[0] << 8) + |
1601 (yc[1] << 16) + (vc[0] << 24); | 1600 (yc[1] << 16) + (vc[0] << 24); |
1602 l = yc[2] + (uc[1] << 8) + | 1601 l = yc[2] + (uc[1] << 8) + |
1603 (yc[3] << 16) + (vc[1] << 24); | 1602 (yc[3] << 16) + (vc[1] << 24); |
1604 *ldst++ = k + (l << 32); | 1603 *ldst++ = k + (l << 32); |
1605 yc += 4; | 1604 yc += 4; |
1606 uc += 2; | 1605 uc += 2; |
1607 vc += 2; | 1606 vc += 2; |
1608 } | 1607 } |
1609 | 1608 |
1610 #else | 1609 #else |
1611 int i, *idst = (int32_t *) dst; | 1610 int i, *idst = (int32_t *) dst; |
1612 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | 1611 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
1613 for(i = 0; i < chromWidth; i++){ | 1612 for (i = 0; i < chromWidth; i++){ |
1614 #ifdef WORDS_BIGENDIAN | 1613 #ifdef WORDS_BIGENDIAN |
1615 *idst++ = (yc[0] << 24)+ (uc[0] << 16) + | 1614 *idst++ = (yc[0] << 24)+ (uc[0] << 16) + |
1616 (yc[1] << 8) + (vc[0] << 0); | 1615 (yc[1] << 8) + (vc[0] << 0); |
1617 #else | 1616 #else |
1618 *idst++ = yc[0] + (uc[0] << 8) + | 1617 *idst++ = yc[0] + (uc[0] << 8) + |
1619 (yc[1] << 16) + (vc[0] << 24); | 1618 (yc[1] << 16) + (vc[0] << 24); |
1620 #endif | 1619 #endif |
1621 yc += 2; | 1620 yc += 2; |
1622 uc++; | 1621 uc++; |
1623 vc++; | 1622 vc++; |
1624 } | 1623 } |
1625 #endif | 1624 #endif |
1626 #endif | 1625 #endif |
1627 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) | 1626 if ((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) |
1628 { | 1627 { |
1629 usrc += chromStride; | 1628 usrc += chromStride; |
1630 vsrc += chromStride; | 1629 vsrc += chromStride; |
1631 } | 1630 } |
1632 ysrc += lumStride; | 1631 ysrc += lumStride; |
1633 dst += dstStride; | 1632 dst += dstStride; |
1634 } | 1633 } |
1635 #ifdef HAVE_MMX | 1634 #ifdef HAVE_MMX |
1636 asm( EMMS" \n\t" | 1635 asm( EMMS" \n\t" |
1637 SFENCE" \n\t" | 1636 SFENCE" \n\t" |
1638 :::"memory"); | 1637 :::"memory"); |
1639 #endif | 1638 #endif |
1640 } | 1639 } |
1641 | 1640 |
1642 /** | 1641 /** |
1643 * | 1642 * |
1644 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | 1643 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a |
1645 * problem for anyone then tell me, and ill fix it) | 1644 * problem for anyone then tell me, and ill fix it) |
1646 */ | 1645 */ |
1647 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | 1646 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1648 long width, long height, | 1647 long width, long height, |
1649 long lumStride, long chromStride, long dstStride) | 1648 long lumStride, long chromStride, long dstStride) |
1650 { | 1649 { |
1651 //FIXME interpolate chroma | 1650 //FIXME interpolate chroma |
1652 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); | 1651 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); |
1653 } | 1652 } |
1654 | 1653 |
1655 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | 1654 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1656 long width, long height, | 1655 long width, long height, |
1657 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | 1656 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
1658 { | 1657 { |
1659 long y; | 1658 long y; |
1660 const long chromWidth= width>>1; | 1659 const long chromWidth= width>>1; |
1661 for(y=0; y<height; y++) | 1660 for (y=0; y<height; y++) |
1662 { | 1661 { |
1663 #ifdef HAVE_MMX | 1662 #ifdef HAVE_MMX |
1664 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) | 1663 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) |
1665 asm volatile( | 1664 asm volatile( |
1666 "xor %%"REG_a", %%"REG_a" \n\t" | 1665 "xor %%"REG_a", %%"REG_a" \n\t" |
1667 ASMALIGN(4) | 1666 ASMALIGN(4) |
1668 "1: \n\t" | 1667 "1: \n\t" |
1669 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | 1668 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" |
1670 PREFETCH" 32(%2, %%"REG_a") \n\t" | 1669 PREFETCH" 32(%2, %%"REG_a") \n\t" |
1671 PREFETCH" 32(%3, %%"REG_a") \n\t" | 1670 PREFETCH" 32(%3, %%"REG_a") \n\t" |
1672 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) | 1671 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) |
1673 "movq %%mm0, %%mm2 \n\t" // U(0) | 1672 "movq %%mm0, %%mm2 \n\t" // U(0) |
1674 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) | 1673 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) |
1675 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | 1674 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) |
1676 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | 1675 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) |
1677 | 1676 |
1678 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) | 1677 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) |
1679 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) | 1678 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) |
1680 "movq %%mm0, %%mm4 \n\t" // Y(0) | 1679 "movq %%mm0, %%mm4 \n\t" // Y(0) |
1681 "movq %%mm2, %%mm6 \n\t" // Y(8) | 1680 "movq %%mm2, %%mm6 \n\t" // Y(8) |
1682 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) | 1681 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) |
1683 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4) | 1682 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4) |
1684 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) | 1683 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) |
1685 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) | 1684 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) |
1686 | 1685 |
1687 MOVNTQ" %%mm0, (%0, %%"REG_a", 4)\n\t" | 1686 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t" |
1688 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4)\n\t" | 1687 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" |
1689 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4)\n\t" | 1688 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t" |
1690 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4)\n\t" | 1689 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" |
1691 | 1690 |
1692 "add $8, %%"REG_a" \n\t" | 1691 "add $8, %%"REG_a" \n\t" |
1693 "cmp %4, %%"REG_a" \n\t" | 1692 "cmp %4, %%"REG_a" \n\t" |
1694 " jb 1b \n\t" | 1693 " jb 1b \n\t" |
1695 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) | 1694 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) |
1696 : "%"REG_a | 1695 : "%"REG_a |
1697 ); | 1696 ); |
1698 #else | 1697 #else |
1699 //FIXME adapt the alpha asm code from yv12->yuy2 | 1698 //FIXME adapt the alpha asm code from yv12->yuy2 |
1700 | 1699 |
1701 #if __WORDSIZE >= 64 | 1700 #if __WORDSIZE >= 64 |
1702 int i; | 1701 int i; |
1703 uint64_t *ldst = (uint64_t *) dst; | 1702 uint64_t *ldst = (uint64_t *) dst; |
1704 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | 1703 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
1705 for(i = 0; i < chromWidth; i += 2){ | 1704 for (i = 0; i < chromWidth; i += 2){ |
1706 uint64_t k, l; | 1705 uint64_t k, l; |
1707 k = uc[0] + (yc[0] << 8) + | 1706 k = uc[0] + (yc[0] << 8) + |
1708 (vc[0] << 16) + (yc[1] << 24); | 1707 (vc[0] << 16) + (yc[1] << 24); |
1709 l = uc[1] + (yc[2] << 8) + | 1708 l = uc[1] + (yc[2] << 8) + |
1710 (vc[1] << 16) + (yc[3] << 24); | 1709 (vc[1] << 16) + (yc[3] << 24); |
1711 *ldst++ = k + (l << 32); | 1710 *ldst++ = k + (l << 32); |
1712 yc += 4; | 1711 yc += 4; |
1713 uc += 2; | 1712 uc += 2; |
1714 vc += 2; | 1713 vc += 2; |
1715 } | 1714 } |
1716 | 1715 |
1717 #else | 1716 #else |
1718 int i, *idst = (int32_t *) dst; | 1717 int i, *idst = (int32_t *) dst; |
1719 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | 1718 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
1720 for(i = 0; i < chromWidth; i++){ | 1719 for (i = 0; i < chromWidth; i++){ |
1721 #ifdef WORDS_BIGENDIAN | 1720 #ifdef WORDS_BIGENDIAN |
1722 *idst++ = (uc[0] << 24)+ (yc[0] << 16) + | 1721 *idst++ = (uc[0] << 24)+ (yc[0] << 16) + |
1723 (vc[0] << 8) + (yc[1] << 0); | 1722 (vc[0] << 8) + (yc[1] << 0); |
1724 #else | 1723 #else |
1725 *idst++ = uc[0] + (yc[0] << 8) + | 1724 *idst++ = uc[0] + (yc[0] << 8) + |
1726 (vc[0] << 16) + (yc[1] << 24); | 1725 (vc[0] << 16) + (yc[1] << 24); |
1727 #endif | 1726 #endif |
1728 yc += 2; | 1727 yc += 2; |
1729 uc++; | 1728 uc++; |
1730 vc++; | 1729 vc++; |
1731 } | 1730 } |
1732 #endif | 1731 #endif |
1733 #endif | 1732 #endif |
1734 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) | 1733 if ((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) |
1735 { | 1734 { |
1736 usrc += chromStride; | 1735 usrc += chromStride; |
1737 vsrc += chromStride; | 1736 vsrc += chromStride; |
1738 } | 1737 } |
1739 ysrc += lumStride; | 1738 ysrc += lumStride; |
1740 dst += dstStride; | 1739 dst += dstStride; |
1741 } | 1740 } |
1742 #ifdef HAVE_MMX | 1741 #ifdef HAVE_MMX |
1743 asm( EMMS" \n\t" | 1742 asm( EMMS" \n\t" |
1744 SFENCE" \n\t" | 1743 SFENCE" \n\t" |
1745 :::"memory"); | 1744 :::"memory"); |
1746 #endif | 1745 #endif |
1747 } | 1746 } |
1748 | 1747 |
1749 /** | 1748 /** |
1750 * | 1749 * |
1751 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | 1750 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a |
1752 * problem for anyone then tell me, and ill fix it) | 1751 * problem for anyone then tell me, and ill fix it) |
1753 */ | 1752 */ |
1754 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | 1753 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1755 long width, long height, | 1754 long width, long height, |
1756 long lumStride, long chromStride, long dstStride) | 1755 long lumStride, long chromStride, long dstStride) |
1757 { | 1756 { |
1758 //FIXME interpolate chroma | 1757 //FIXME interpolate chroma |
1759 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); | 1758 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); |
1760 } | 1759 } |
1761 | 1760 |
1762 /** | 1761 /** |
1763 * | 1762 * |
1764 * width should be a multiple of 16 | 1763 * width should be a multiple of 16 |
1765 */ | 1764 */ |
1766 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | 1765 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1767 long width, long height, | 1766 long width, long height, |
1768 long lumStride, long chromStride, long dstStride) | 1767 long lumStride, long chromStride, long dstStride) |
1769 { | 1768 { |
1770 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); | 1769 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); |
1771 } | 1770 } |
1772 | 1771 |
1773 /** | 1772 /** |
1774 * | 1773 * |
1775 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | 1774 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a |
1776 * problem for anyone then tell me, and ill fix it) | 1775 * problem for anyone then tell me, and ill fix it) |
1777 */ | 1776 */ |
1778 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 1777 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
1779 long width, long height, | 1778 long width, long height, |
1780 long lumStride, long chromStride, long srcStride) | 1779 long lumStride, long chromStride, long srcStride) |
1781 { | 1780 { |
1782 long y; | 1781 long y; |
1783 const long chromWidth= width>>1; | 1782 const long chromWidth= width>>1; |
1784 for(y=0; y<height; y+=2) | 1783 for (y=0; y<height; y+=2) |
1785 { | 1784 { |
1786 #ifdef HAVE_MMX | 1785 #ifdef HAVE_MMX |
1787 asm volatile( | 1786 asm volatile( |
1788 "xor %%"REG_a", %%"REG_a" \n\t" | 1787 "xor %%"REG_a", %%"REG_a" \n\t" |
1789 "pcmpeqw %%mm7, %%mm7 \n\t" | 1788 "pcmpeqw %%mm7, %%mm7 \n\t" |
1790 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | 1789 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... |
1791 ASMALIGN(4) | 1790 ASMALIGN(4) |
1792 "1: \n\t" | 1791 "1: \n\t" |
1793 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | 1792 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" |
1794 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | 1793 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) |
1795 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4) | 1794 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) |
1796 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) | 1795 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) |
1797 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) | 1796 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) |
1798 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) | 1797 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) |
1799 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) | 1798 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) |
1800 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | 1799 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) |
1801 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | 1800 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) |
1802 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | 1801 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) |
1803 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | 1802 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) |
1804 | 1803 |
1805 MOVNTQ" %%mm2, (%1, %%"REG_a", 2)\n\t" | 1804 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" |
1806 | 1805 |
1807 "movq 16(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(8) | 1806 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) |
1808 "movq 24(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(12) | 1807 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) |
1809 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) | 1808 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) |
1810 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) | 1809 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) |
1811 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) | 1810 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) |
1812 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) | 1811 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) |
1813 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | 1812 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) |
1814 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | 1813 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) |
1815 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | 1814 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) |
1816 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | 1815 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) |
1817 | 1816 |
1818 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2)\n\t" | 1817 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" |
1819 | 1818 |
1820 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) | 1819 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) |
1821 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | 1820 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) |
1822 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | 1821 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) |
1823 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | 1822 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) |
1824 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | 1823 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) |
1825 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | 1824 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) |
1826 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | 1825 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) |
1827 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | 1826 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) |
1828 | 1827 |
1829 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" | 1828 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" |
1830 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" | 1829 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" |
1831 | 1830 |
1832 "add $8, %%"REG_a" \n\t" | 1831 "add $8, %%"REG_a" \n\t" |
1833 "cmp %4, %%"REG_a" \n\t" | 1832 "cmp %4, %%"REG_a" \n\t" |
1834 " jb 1b \n\t" | 1833 " jb 1b \n\t" |
1835 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | 1834 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
1836 : "memory", "%"REG_a | 1835 : "memory", "%"REG_a |
1837 ); | 1836 ); |
1838 | 1837 |
1839 ydst += lumStride; | 1838 ydst += lumStride; |
1840 src += srcStride; | 1839 src += srcStride; |
1841 | 1840 |
1842 asm volatile( | 1841 asm volatile( |
1843 "xor %%"REG_a", %%"REG_a" \n\t" | 1842 "xor %%"REG_a", %%"REG_a" \n\t" |
1844 ASMALIGN(4) | 1843 ASMALIGN(4) |
1845 "1: \n\t" | 1844 "1: \n\t" |
1846 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | 1845 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" |
1847 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | 1846 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) |
1848 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4) | 1847 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) |
1849 "movq 16(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(8) | 1848 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) |
1850 "movq 24(%0, %%"REG_a", 4), %%mm3\n\t" // YUYV YUYV(12) | 1849 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) |
1851 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | 1850 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) |
1852 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | 1851 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) |
1853 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | 1852 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) |
1854 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | 1853 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) |
1855 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | 1854 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) |
1856 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | 1855 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) |
1857 | 1856 |
1858 MOVNTQ" %%mm0, (%1, %%"REG_a", 2)\n\t" | 1857 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" |
1859 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2)\n\t" | 1858 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" |
1860 | 1859 |
1861 "add $8, %%"REG_a" \n\t" | 1860 "add $8, %%"REG_a" \n\t" |
1862 "cmp %4, %%"REG_a" \n\t" | 1861 "cmp %4, %%"REG_a" \n\t" |
1863 " jb 1b \n\t" | 1862 " jb 1b \n\t" |
1864 | 1863 |
1865 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | 1864 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
1866 : "memory", "%"REG_a | 1865 : "memory", "%"REG_a |
1867 ); | 1866 ); |
1868 #else | 1867 #else |
1869 long i; | 1868 long i; |
1870 for(i=0; i<chromWidth; i++) | 1869 for (i=0; i<chromWidth; i++) |
1871 { | 1870 { |
1872 ydst[2*i+0] = src[4*i+0]; | 1871 ydst[2*i+0] = src[4*i+0]; |
1873 udst[i] = src[4*i+1]; | 1872 udst[i] = src[4*i+1]; |
1874 ydst[2*i+1] = src[4*i+2]; | 1873 ydst[2*i+1] = src[4*i+2]; |
1875 vdst[i] = src[4*i+3]; | 1874 vdst[i] = src[4*i+3]; |
1876 } | 1875 } |
1877 ydst += lumStride; | 1876 ydst += lumStride; |
1878 src += srcStride; | 1877 src += srcStride; |
1879 | 1878 |
1880 for(i=0; i<chromWidth; i++) | 1879 for (i=0; i<chromWidth; i++) |
1881 { | 1880 { |
1882 ydst[2*i+0] = src[4*i+0]; | 1881 ydst[2*i+0] = src[4*i+0]; |
1883 ydst[2*i+1] = src[4*i+2]; | 1882 ydst[2*i+1] = src[4*i+2]; |
1884 } | 1883 } |
1885 #endif | 1884 #endif |
1886 udst += chromStride; | 1885 udst += chromStride; |
1887 vdst += chromStride; | 1886 vdst += chromStride; |
1888 ydst += lumStride; | 1887 ydst += lumStride; |
1889 src += srcStride; | 1888 src += srcStride; |
1890 } | 1889 } |
1891 #ifdef HAVE_MMX | 1890 #ifdef HAVE_MMX |
1892 asm volatile( EMMS" \n\t" | 1891 asm volatile( EMMS" \n\t" |
1893 SFENCE" \n\t" | 1892 SFENCE" \n\t" |
1894 :::"memory"); | 1893 :::"memory"); |
1895 #endif | 1894 #endif |
1896 } | 1895 } |
1897 | 1896 |
1898 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, | 1897 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, |
1899 uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 1898 uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
1900 long width, long height, long lumStride, long chromStride) | 1899 long width, long height, long lumStride, long chromStride) |
1901 { | 1900 { |
1902 /* Y Plane */ | 1901 /* Y Plane */ |
1903 memcpy(ydst, ysrc, width*height); | 1902 memcpy(ydst, ysrc, width*height); |
1904 | 1903 |
1905 /* XXX: implement upscaling for U,V */ | 1904 /* XXX: implement upscaling for U,V */ |
1906 } | 1905 } |
1907 | 1906 |
1908 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) | 1907 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) |
1909 { | 1908 { |
1910 long x,y; | 1909 long x,y; |
1911 | 1910 |
1912 dst[0]= src[0]; | 1911 dst[0]= src[0]; |
1913 | 1912 |
1914 // first line | 1913 // first line |
1915 for(x=0; x<srcWidth-1; x++){ | 1914 for (x=0; x<srcWidth-1; x++){ |
1916 dst[2*x+1]= (3*src[x] + src[x+1])>>2; | 1915 dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
1917 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; | 1916 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
1918 } | 1917 } |
1919 dst[2*srcWidth-1]= src[srcWidth-1]; | 1918 dst[2*srcWidth-1]= src[srcWidth-1]; |
1920 | 1919 |
1921 dst+= dstStride; | 1920 dst+= dstStride; |
1922 | 1921 |
1923 for(y=1; y<srcHeight; y++){ | 1922 for (y=1; y<srcHeight; y++){ |
1924 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1923 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1925 const long mmxSize= srcWidth&~15; | 1924 const long mmxSize= srcWidth&~15; |
1926 asm volatile( | 1925 asm volatile( |
1927 "mov %4, %%"REG_a" \n\t" | 1926 "mov %4, %%"REG_a" \n\t" |
1928 "1: \n\t" | 1927 "1: \n\t" |
1929 "movq (%0, %%"REG_a"), %%mm0 \n\t" | 1928 "movq (%0, %%"REG_a"), %%mm0 \n\t" |
1930 "movq (%1, %%"REG_a"), %%mm1 \n\t" | 1929 "movq (%1, %%"REG_a"), %%mm1 \n\t" |
1931 "movq 1(%0, %%"REG_a"), %%mm2 \n\t" | 1930 "movq 1(%0, %%"REG_a"), %%mm2 \n\t" |
1932 "movq 1(%1, %%"REG_a"), %%mm3 \n\t" | 1931 "movq 1(%1, %%"REG_a"), %%mm3 \n\t" |
1933 "movq -1(%0, %%"REG_a"), %%mm4 \n\t" | 1932 "movq -1(%0, %%"REG_a"), %%mm4 \n\t" |
1934 "movq -1(%1, %%"REG_a"), %%mm5 \n\t" | 1933 "movq -1(%1, %%"REG_a"), %%mm5 \n\t" |
1935 PAVGB" %%mm0, %%mm5 \n\t" | 1934 PAVGB" %%mm0, %%mm5 \n\t" |
1936 PAVGB" %%mm0, %%mm3 \n\t" | 1935 PAVGB" %%mm0, %%mm3 \n\t" |
1937 PAVGB" %%mm0, %%mm5 \n\t" | 1936 PAVGB" %%mm0, %%mm5 \n\t" |
1938 PAVGB" %%mm0, %%mm3 \n\t" | 1937 PAVGB" %%mm0, %%mm3 \n\t" |
1939 PAVGB" %%mm1, %%mm4 \n\t" | 1938 PAVGB" %%mm1, %%mm4 \n\t" |
1940 PAVGB" %%mm1, %%mm2 \n\t" | 1939 PAVGB" %%mm1, %%mm2 \n\t" |
1941 PAVGB" %%mm1, %%mm4 \n\t" | 1940 PAVGB" %%mm1, %%mm4 \n\t" |
1942 PAVGB" %%mm1, %%mm2 \n\t" | 1941 PAVGB" %%mm1, %%mm2 \n\t" |
1943 "movq %%mm5, %%mm7 \n\t" | 1942 "movq %%mm5, %%mm7 \n\t" |
1944 "movq %%mm4, %%mm6 \n\t" | 1943 "movq %%mm4, %%mm6 \n\t" |
1945 "punpcklbw %%mm3, %%mm5 \n\t" | 1944 "punpcklbw %%mm3, %%mm5 \n\t" |
1946 "punpckhbw %%mm3, %%mm7 \n\t" | 1945 "punpckhbw %%mm3, %%mm7 \n\t" |
1947 "punpcklbw %%mm2, %%mm4 \n\t" | 1946 "punpcklbw %%mm2, %%mm4 \n\t" |
1948 "punpckhbw %%mm2, %%mm6 \n\t" | 1947 "punpckhbw %%mm2, %%mm6 \n\t" |
1949 #if 1 | 1948 #if 1 |
1950 MOVNTQ" %%mm5, (%2, %%"REG_a", 2)\n\t" | 1949 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" |
1951 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2)\n\t" | 1950 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" |
1952 MOVNTQ" %%mm4, (%3, %%"REG_a", 2)\n\t" | 1951 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" |
1953 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2)\n\t" | 1952 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" |
1954 #else | 1953 #else |
1955 "movq %%mm5, (%2, %%"REG_a", 2) \n\t" | 1954 "movq %%mm5, (%2, %%"REG_a", 2) \n\t" |
1956 "movq %%mm7, 8(%2, %%"REG_a", 2)\n\t" | 1955 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t" |
1957 "movq %%mm4, (%3, %%"REG_a", 2) \n\t" | 1956 "movq %%mm4, (%3, %%"REG_a", 2) \n\t" |
1958 "movq %%mm6, 8(%3, %%"REG_a", 2)\n\t" | 1957 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t" |
1959 #endif | 1958 #endif |
1960 "add $8, %%"REG_a" \n\t" | 1959 "add $8, %%"REG_a" \n\t" |
1961 " js 1b \n\t" | 1960 " js 1b \n\t" |
1962 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), | 1961 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), |
1963 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), | 1962 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), |
1964 "g" (-mmxSize) | 1963 "g" (-mmxSize) |
1965 : "%"REG_a | 1964 : "%"REG_a |
1966 | 1965 |
1967 ); | 1966 ); |
1968 #else | 1967 #else |
1969 const long mmxSize=1; | 1968 const long mmxSize=1; |
1970 #endif | 1969 #endif |
1971 dst[0 ]= (3*src[0] + src[srcStride])>>2; | 1970 dst[0 ]= (3*src[0] + src[srcStride])>>2; |
1972 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; | 1971 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; |
1973 | 1972 |
1974 for(x=mmxSize-1; x<srcWidth-1; x++){ | 1973 for (x=mmxSize-1; x<srcWidth-1; x++){ |
1975 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; | 1974 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
1976 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; | 1975 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; |
1977 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; | 1976 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; |
1978 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; | 1977 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; |
1979 } | 1978 } |
1980 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; | 1979 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; |
1981 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; | 1980 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; |
1982 | 1981 |
1983 dst+=dstStride*2; | 1982 dst+=dstStride*2; |
1984 src+=srcStride; | 1983 src+=srcStride; |
1985 } | 1984 } |
1986 | 1985 |
1987 // last line | 1986 // last line |
1988 #if 1 | 1987 #if 1 |
1989 dst[0]= src[0]; | 1988 dst[0]= src[0]; |
1990 | 1989 |
1991 for(x=0; x<srcWidth-1; x++){ | 1990 for (x=0; x<srcWidth-1; x++){ |
1992 dst[2*x+1]= (3*src[x] + src[x+1])>>2; | 1991 dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
1993 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; | 1992 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
1994 } | 1993 } |
1995 dst[2*srcWidth-1]= src[srcWidth-1]; | 1994 dst[2*srcWidth-1]= src[srcWidth-1]; |
1996 #else | 1995 #else |
1997 for(x=0; x<srcWidth; x++){ | 1996 for (x=0; x<srcWidth; x++){ |
1998 dst[2*x+0]= | 1997 dst[2*x+0]= |
1999 dst[2*x+1]= src[x]; | 1998 dst[2*x+1]= src[x]; |
2000 } | 1999 } |
2001 #endif | 2000 #endif |
2002 | 2001 |
2003 #ifdef HAVE_MMX | 2002 #ifdef HAVE_MMX |
2004 asm volatile( EMMS" \n\t" | 2003 asm volatile( EMMS" \n\t" |
2005 SFENCE" \n\t" | 2004 SFENCE" \n\t" |
2006 :::"memory"); | 2005 :::"memory"); |
2007 #endif | 2006 #endif |
2008 } | 2007 } |
2009 | 2008 |
2010 /** | 2009 /** |
2011 * | 2010 * |
2012 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a | 2011 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a |
2013 * problem for anyone then tell me, and ill fix it) | 2012 * problem for anyone then tell me, and ill fix it) |
2014 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version | 2013 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version |
2015 */ | 2014 */ |
2016 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 2015 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
2017 long width, long height, | 2016 long width, long height, |
2018 long lumStride, long chromStride, long srcStride) | 2017 long lumStride, long chromStride, long srcStride) |
2019 { | 2018 { |
2020 long y; | 2019 long y; |
2021 const long chromWidth= width>>1; | 2020 const long chromWidth= width>>1; |
2022 for(y=0; y<height; y+=2) | 2021 for (y=0; y<height; y+=2) |
2023 { | 2022 { |
2024 #ifdef HAVE_MMX | 2023 #ifdef HAVE_MMX |
2025 asm volatile( | 2024 asm volatile( |
2026 "xorl %%eax, %%eax \n\t" | 2025 "xorl %%eax, %%eax \n\t" |
2027 "pcmpeqw %%mm7, %%mm7 \n\t" | 2026 "pcmpeqw %%mm7, %%mm7 \n\t" |
2028 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | 2027 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... |
2029 ASMALIGN(4) | 2028 ASMALIGN(4) |
2030 "1: \n\t" | 2029 "1: \n\t" |
2031 PREFETCH" 64(%0, %%eax, 4) \n\t" | 2030 PREFETCH" 64(%0, %%eax, 4) \n\t" |
2032 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) | 2031 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0) |
2033 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4) | 2032 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4) |
2034 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) | 2033 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) |
2035 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) | 2034 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) |
2036 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) | 2035 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) |
2037 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) | 2036 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) |
2038 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | 2037 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) |
2039 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | 2038 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) |
2040 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | 2039 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) |
2041 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | 2040 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) |
2042 | 2041 |
2043 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t" | 2042 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t" |
2044 | 2043 |
2045 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(8) | 2044 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(8) |
2046 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // UYVY UYVY(12) | 2045 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // UYVY UYVY(12) |
2047 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) | 2046 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) |
2048 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) | 2047 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) |
2049 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) | 2048 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) |
2050 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) | 2049 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) |
2051 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | 2050 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) |
2052 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | 2051 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) |
2053 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | 2052 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) |
2054 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | 2053 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) |
2055 | 2054 |
2056 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t" | 2055 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t" |
2057 | 2056 |
2058 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) | 2057 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) |
2059 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | 2058 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) |
2060 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | 2059 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) |
2061 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | 2060 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) |
2062 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | 2061 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) |
2063 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | 2062 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) |
2064 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | 2063 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) |
2065 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | 2064 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) |
2066 | 2065 |
2067 MOVNTQ" %%mm0, (%3, %%eax) \n\t" | 2066 MOVNTQ" %%mm0, (%3, %%eax) \n\t" |
2068 MOVNTQ" %%mm2, (%2, %%eax) \n\t" | 2067 MOVNTQ" %%mm2, (%2, %%eax) \n\t" |
2069 | 2068 |
2070 "addl $8, %%eax \n\t" | 2069 "addl $8, %%eax \n\t" |
2071 "cmpl %4, %%eax \n\t" | 2070 "cmpl %4, %%eax \n\t" |
2072 " jb 1b \n\t" | 2071 " jb 1b \n\t" |
2073 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | 2072 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2074 : "memory", "%eax" | 2073 : "memory", "%eax" |
2075 ); | 2074 ); |
2076 | 2075 |
2077 ydst += lumStride; | 2076 ydst += lumStride; |
2078 src += srcStride; | 2077 src += srcStride; |
2079 | 2078 |
2080 asm volatile( | 2079 asm volatile( |
2081 "xorl %%eax, %%eax \n\t" | 2080 "xorl %%eax, %%eax \n\t" |
2082 ASMALIGN(4) | 2081 ASMALIGN(4) |
2083 "1: \n\t" | 2082 "1: \n\t" |
2084 PREFETCH" 64(%0, %%eax, 4) \n\t" | 2083 PREFETCH" 64(%0, %%eax, 4) \n\t" |
2085 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) | 2084 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0) |
2086 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) | 2085 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4) |
2087 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) | 2086 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8) |
2088 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12) | 2087 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12) |
2089 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | 2088 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) |
2090 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | 2089 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) |
2091 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | 2090 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) |
2092 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | 2091 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) |
2093 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | 2092 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) |
2094 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | 2093 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) |
2095 | 2094 |
2096 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t" | 2095 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t" |
2097 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t" | 2096 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t" |
2098 | 2097 |
2099 "addl $8, %%eax \n\t" | 2098 "addl $8, %%eax \n\t" |
2100 "cmpl %4, %%eax \n\t" | 2099 "cmpl %4, %%eax \n\t" |
2101 " jb 1b \n\t" | 2100 " jb 1b \n\t" |
2102 | 2101 |
2103 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | 2102 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2104 : "memory", "%eax" | 2103 : "memory", "%eax" |
2105 ); | 2104 ); |
2106 #else | 2105 #else |
2107 long i; | 2106 long i; |
2108 for(i=0; i<chromWidth; i++) | 2107 for (i=0; i<chromWidth; i++) |
2109 { | 2108 { |
2110 udst[i] = src[4*i+0]; | 2109 udst[i] = src[4*i+0]; |
2111 ydst[2*i+0] = src[4*i+1]; | 2110 ydst[2*i+0] = src[4*i+1]; |
2112 vdst[i] = src[4*i+2]; | 2111 vdst[i] = src[4*i+2]; |
2113 ydst[2*i+1] = src[4*i+3]; | 2112 ydst[2*i+1] = src[4*i+3]; |
2114 } | 2113 } |
2115 ydst += lumStride; | 2114 ydst += lumStride; |
2116 src += srcStride; | 2115 src += srcStride; |
2117 | 2116 |
2118 for(i=0; i<chromWidth; i++) | 2117 for (i=0; i<chromWidth; i++) |
2119 { | 2118 { |
2120 ydst[2*i+0] = src[4*i+1]; | 2119 ydst[2*i+0] = src[4*i+1]; |
2121 ydst[2*i+1] = src[4*i+3]; | 2120 ydst[2*i+1] = src[4*i+3]; |
2122 } | 2121 } |
2123 #endif | 2122 #endif |
2124 udst += chromStride; | 2123 udst += chromStride; |
2125 vdst += chromStride; | 2124 vdst += chromStride; |
2126 ydst += lumStride; | 2125 ydst += lumStride; |
2127 src += srcStride; | 2126 src += srcStride; |
2128 } | 2127 } |
2129 #ifdef HAVE_MMX | 2128 #ifdef HAVE_MMX |
2130 asm volatile( EMMS" \n\t" | 2129 asm volatile( EMMS" \n\t" |
2131 SFENCE" \n\t" | 2130 SFENCE" \n\t" |
2132 :::"memory"); | 2131 :::"memory"); |
2133 #endif | 2132 #endif |
2134 } | 2133 } |
2135 | 2134 |
2136 /** | 2135 /** |
2137 * | 2136 * |
2138 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a | 2137 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a |
2139 * problem for anyone then tell me, and ill fix it) | 2138 * problem for anyone then tell me, and ill fix it) |
2140 * chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version | 2139 * chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version |
2141 */ | 2140 */ |
2142 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 2141 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
2143 long width, long height, | 2142 long width, long height, |
2144 long lumStride, long chromStride, long srcStride) | 2143 long lumStride, long chromStride, long srcStride) |
2145 { | 2144 { |
2146 long y; | 2145 long y; |
2147 const long chromWidth= width>>1; | 2146 const long chromWidth= width>>1; |
2148 #ifdef HAVE_MMX | 2147 #ifdef HAVE_MMX |
2149 for(y=0; y<height-2; y+=2) | 2148 for (y=0; y<height-2; y+=2) |
2150 { | 2149 { |
2151 long i; | 2150 long i; |
2152 for(i=0; i<2; i++) | 2151 for (i=0; i<2; i++) |
2153 { | 2152 { |
2154 asm volatile( | 2153 asm volatile( |
2155 "mov %2, %%"REG_a" \n\t" | 2154 "mov %2, %%"REG_a" \n\t" |
2156 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | 2155 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
2157 "movq "MANGLE(w1111)", %%mm5 \n\t" | 2156 "movq "MANGLE(w1111)", %%mm5 \n\t" |
2158 "pxor %%mm7, %%mm7 \n\t" | 2157 "pxor %%mm7, %%mm7 \n\t" |
2159 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t" | 2158 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" |
2160 ASMALIGN(4) | 2159 ASMALIGN(4) |
2161 "1: \n\t" | 2160 "1: \n\t" |
2162 PREFETCH" 64(%0, %%"REG_d") \n\t" | 2161 PREFETCH" 64(%0, %%"REG_d") \n\t" |
2163 "movd (%0, %%"REG_d"), %%mm0 \n\t" | 2162 "movd (%0, %%"REG_d"), %%mm0 \n\t" |
2164 "movd 3(%0, %%"REG_d"), %%mm1 \n\t" | 2163 "movd 3(%0, %%"REG_d"), %%mm1 \n\t" |
2165 "punpcklbw %%mm7, %%mm0 \n\t" | 2164 "punpcklbw %%mm7, %%mm0 \n\t" |
2166 "punpcklbw %%mm7, %%mm1 \n\t" | 2165 "punpcklbw %%mm7, %%mm1 \n\t" |
2167 "movd 6(%0, %%"REG_d"), %%mm2 \n\t" | 2166 "movd 6(%0, %%"REG_d"), %%mm2 \n\t" |
2168 "movd 9(%0, %%"REG_d"), %%mm3 \n\t" | 2167 "movd 9(%0, %%"REG_d"), %%mm3 \n\t" |
2169 "punpcklbw %%mm7, %%mm2 \n\t" | 2168 "punpcklbw %%mm7, %%mm2 \n\t" |
2170 "punpcklbw %%mm7, %%mm3 \n\t" | 2169 "punpcklbw %%mm7, %%mm3 \n\t" |
2171 "pmaddwd %%mm6, %%mm0 \n\t" | 2170 "pmaddwd %%mm6, %%mm0 \n\t" |
2172 "pmaddwd %%mm6, %%mm1 \n\t" | 2171 "pmaddwd %%mm6, %%mm1 \n\t" |
2173 "pmaddwd %%mm6, %%mm2 \n\t" | 2172 "pmaddwd %%mm6, %%mm2 \n\t" |
2174 "pmaddwd %%mm6, %%mm3 \n\t" | 2173 "pmaddwd %%mm6, %%mm3 \n\t" |
2175 #ifndef FAST_BGR2YV12 | 2174 #ifndef FAST_BGR2YV12 |
2176 "psrad $8, %%mm0 \n\t" | 2175 "psrad $8, %%mm0 \n\t" |
2177 "psrad $8, %%mm1 \n\t" | 2176 "psrad $8, %%mm1 \n\t" |
2178 "psrad $8, %%mm2 \n\t" | 2177 "psrad $8, %%mm2 \n\t" |
2179 "psrad $8, %%mm3 \n\t" | 2178 "psrad $8, %%mm3 \n\t" |
2180 #endif | 2179 #endif |
2181 "packssdw %%mm1, %%mm0 \n\t" | 2180 "packssdw %%mm1, %%mm0 \n\t" |
2182 "packssdw %%mm3, %%mm2 \n\t" | 2181 "packssdw %%mm3, %%mm2 \n\t" |
2183 "pmaddwd %%mm5, %%mm0 \n\t" | 2182 "pmaddwd %%mm5, %%mm0 \n\t" |
2184 "pmaddwd %%mm5, %%mm2 \n\t" | 2183 "pmaddwd %%mm5, %%mm2 \n\t" |
2185 "packssdw %%mm2, %%mm0 \n\t" | 2184 "packssdw %%mm2, %%mm0 \n\t" |
2186 "psraw $7, %%mm0 \n\t" | 2185 "psraw $7, %%mm0 \n\t" |
2187 | 2186 |
2188 "movd 12(%0, %%"REG_d"), %%mm4 \n\t" | 2187 "movd 12(%0, %%"REG_d"), %%mm4 \n\t" |
2189 "movd 15(%0, %%"REG_d"), %%mm1 \n\t" | 2188 "movd 15(%0, %%"REG_d"), %%mm1 \n\t" |
2190 "punpcklbw %%mm7, %%mm4 \n\t" | 2189 "punpcklbw %%mm7, %%mm4 \n\t" |
2191 "punpcklbw %%mm7, %%mm1 \n\t" | 2190 "punpcklbw %%mm7, %%mm1 \n\t" |
2192 "movd 18(%0, %%"REG_d"), %%mm2 \n\t" | 2191 "movd 18(%0, %%"REG_d"), %%mm2 \n\t" |
2193 "movd 21(%0, %%"REG_d"), %%mm3 \n\t" | 2192 "movd 21(%0, %%"REG_d"), %%mm3 \n\t" |
2194 "punpcklbw %%mm7, %%mm2 \n\t" | 2193 "punpcklbw %%mm7, %%mm2 \n\t" |
2195 "punpcklbw %%mm7, %%mm3 \n\t" | 2194 "punpcklbw %%mm7, %%mm3 \n\t" |
2196 "pmaddwd %%mm6, %%mm4 \n\t" | 2195 "pmaddwd %%mm6, %%mm4 \n\t" |
2197 "pmaddwd %%mm6, %%mm1 \n\t" | 2196 "pmaddwd %%mm6, %%mm1 \n\t" |
2198 "pmaddwd %%mm6, %%mm2 \n\t" | 2197 "pmaddwd %%mm6, %%mm2 \n\t" |
2199 "pmaddwd %%mm6, %%mm3 \n\t" | 2198 "pmaddwd %%mm6, %%mm3 \n\t" |
2200 #ifndef FAST_BGR2YV12 | 2199 #ifndef FAST_BGR2YV12 |
2201 "psrad $8, %%mm4 \n\t" | 2200 "psrad $8, %%mm4 \n\t" |
2202 "psrad $8, %%mm1 \n\t" | 2201 "psrad $8, %%mm1 \n\t" |
2203 "psrad $8, %%mm2 \n\t" | 2202 "psrad $8, %%mm2 \n\t" |
2204 "psrad $8, %%mm3 \n\t" | 2203 "psrad $8, %%mm3 \n\t" |
2205 #endif | 2204 #endif |
2206 "packssdw %%mm1, %%mm4 \n\t" | 2205 "packssdw %%mm1, %%mm4 \n\t" |
2207 "packssdw %%mm3, %%mm2 \n\t" | 2206 "packssdw %%mm3, %%mm2 \n\t" |
2208 "pmaddwd %%mm5, %%mm4 \n\t" | 2207 "pmaddwd %%mm5, %%mm4 \n\t" |
2209 "pmaddwd %%mm5, %%mm2 \n\t" | 2208 "pmaddwd %%mm5, %%mm2 \n\t" |
2210 "add $24, %%"REG_d" \n\t" | 2209 "add $24, %%"REG_d" \n\t" |
2211 "packssdw %%mm2, %%mm4 \n\t" | 2210 "packssdw %%mm2, %%mm4 \n\t" |
2212 "psraw $7, %%mm4 \n\t" | 2211 "psraw $7, %%mm4 \n\t" |
2213 | 2212 |
2214 "packuswb %%mm4, %%mm0 \n\t" | 2213 "packuswb %%mm4, %%mm0 \n\t" |
2215 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" | 2214 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" |
2216 | 2215 |
2217 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" | 2216 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" |
2218 "add $8, %%"REG_a" \n\t" | 2217 "add $8, %%"REG_a" \n\t" |
2219 " js 1b \n\t" | 2218 " js 1b \n\t" |
2220 : : "r" (src+width*3), "r" (ydst+width), "g" (-width) | 2219 : : "r" (src+width*3), "r" (ydst+width), "g" (-width) |
2221 : "%"REG_a, "%"REG_d | 2220 : "%"REG_a, "%"REG_d |
2222 ); | 2221 ); |
2223 ydst += lumStride; | 2222 ydst += lumStride; |
2224 src += srcStride; | 2223 src += srcStride; |
2225 } | 2224 } |
2226 src -= srcStride*2; | 2225 src -= srcStride*2; |
2227 asm volatile( | 2226 asm volatile( |
2228 "mov %4, %%"REG_a" \n\t" | 2227 "mov %4, %%"REG_a" \n\t" |
2229 "movq "MANGLE(w1111)", %%mm5 \n\t" | 2228 "movq "MANGLE(w1111)", %%mm5 \n\t" |
2230 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | 2229 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" |
2231 "pxor %%mm7, %%mm7 \n\t" | 2230 "pxor %%mm7, %%mm7 \n\t" |
2232 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t" | 2231 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" |
2233 "add %%"REG_d", %%"REG_d" \n\t" | 2232 "add %%"REG_d", %%"REG_d" \n\t" |
2234 ASMALIGN(4) | 2233 ASMALIGN(4) |
2235 "1: \n\t" | 2234 "1: \n\t" |
2236 PREFETCH" 64(%0, %%"REG_d") \n\t" | 2235 PREFETCH" 64(%0, %%"REG_d") \n\t" |
2237 PREFETCH" 64(%1, %%"REG_d") \n\t" | 2236 PREFETCH" 64(%1, %%"REG_d") \n\t" |
2238 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2237 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
2239 "movq (%0, %%"REG_d"), %%mm0 \n\t" | 2238 "movq (%0, %%"REG_d"), %%mm0 \n\t" |
2240 "movq (%1, %%"REG_d"), %%mm1 \n\t" | 2239 "movq (%1, %%"REG_d"), %%mm1 \n\t" |
2241 "movq 6(%0, %%"REG_d"), %%mm2 \n\t" | 2240 "movq 6(%0, %%"REG_d"), %%mm2 \n\t" |
2242 "movq 6(%1, %%"REG_d"), %%mm3 \n\t" | 2241 "movq 6(%1, %%"REG_d"), %%mm3 \n\t" |
2243 PAVGB" %%mm1, %%mm0 \n\t" | 2242 PAVGB" %%mm1, %%mm0 \n\t" |
2244 PAVGB" %%mm3, %%mm2 \n\t" | 2243 PAVGB" %%mm3, %%mm2 \n\t" |
2245 "movq %%mm0, %%mm1 \n\t" | 2244 "movq %%mm0, %%mm1 \n\t" |
2246 "movq %%mm2, %%mm3 \n\t" | 2245 "movq %%mm2, %%mm3 \n\t" |
2247 "psrlq $24, %%mm0 \n\t" | 2246 "psrlq $24, %%mm0 \n\t" |
2248 "psrlq $24, %%mm2 \n\t" | 2247 "psrlq $24, %%mm2 \n\t" |
2249 PAVGB" %%mm1, %%mm0 \n\t" | 2248 PAVGB" %%mm1, %%mm0 \n\t" |
2250 PAVGB" %%mm3, %%mm2 \n\t" | 2249 PAVGB" %%mm3, %%mm2 \n\t" |
2251 "punpcklbw %%mm7, %%mm0 \n\t" | 2250 "punpcklbw %%mm7, %%mm0 \n\t" |
2252 "punpcklbw %%mm7, %%mm2 \n\t" | 2251 "punpcklbw %%mm7, %%mm2 \n\t" |
2253 #else | 2252 #else |
2254 "movd (%0, %%"REG_d"), %%mm0 \n\t" | 2253 "movd (%0, %%"REG_d"), %%mm0 \n\t" |
2255 "movd (%1, %%"REG_d"), %%mm1 \n\t" | 2254 "movd (%1, %%"REG_d"), %%mm1 \n\t" |
2256 "movd 3(%0, %%"REG_d"), %%mm2 \n\t" | 2255 "movd 3(%0, %%"REG_d"), %%mm2 \n\t" |
2257 "movd 3(%1, %%"REG_d"), %%mm3 \n\t" | 2256 "movd 3(%1, %%"REG_d"), %%mm3 \n\t" |
2258 "punpcklbw %%mm7, %%mm0 \n\t" | 2257 "punpcklbw %%mm7, %%mm0 \n\t" |
2259 "punpcklbw %%mm7, %%mm1 \n\t" | 2258 "punpcklbw %%mm7, %%mm1 \n\t" |
2260 "punpcklbw %%mm7, %%mm2 \n\t" | 2259 "punpcklbw %%mm7, %%mm2 \n\t" |
2261 "punpcklbw %%mm7, %%mm3 \n\t" | 2260 "punpcklbw %%mm7, %%mm3 \n\t" |
2262 "paddw %%mm1, %%mm0 \n\t" | 2261 "paddw %%mm1, %%mm0 \n\t" |
2263 "paddw %%mm3, %%mm2 \n\t" | 2262 "paddw %%mm3, %%mm2 \n\t" |
2264 "paddw %%mm2, %%mm0 \n\t" | 2263 "paddw %%mm2, %%mm0 \n\t" |
2265 "movd 6(%0, %%"REG_d"), %%mm4 \n\t" | 2264 "movd 6(%0, %%"REG_d"), %%mm4 \n\t" |
2266 "movd 6(%1, %%"REG_d"), %%mm1 \n\t" | 2265 "movd 6(%1, %%"REG_d"), %%mm1 \n\t" |
2267 "movd 9(%0, %%"REG_d"), %%mm2 \n\t" | 2266 "movd 9(%0, %%"REG_d"), %%mm2 \n\t" |
2268 "movd 9(%1, %%"REG_d"), %%mm3 \n\t" | 2267 "movd 9(%1, %%"REG_d"), %%mm3 \n\t" |
2269 "punpcklbw %%mm7, %%mm4 \n\t" | 2268 "punpcklbw %%mm7, %%mm4 \n\t" |
2270 "punpcklbw %%mm7, %%mm1 \n\t" | 2269 "punpcklbw %%mm7, %%mm1 \n\t" |
2271 "punpcklbw %%mm7, %%mm2 \n\t" | 2270 "punpcklbw %%mm7, %%mm2 \n\t" |
2272 "punpcklbw %%mm7, %%mm3 \n\t" | 2271 "punpcklbw %%mm7, %%mm3 \n\t" |
2273 "paddw %%mm1, %%mm4 \n\t" | 2272 "paddw %%mm1, %%mm4 \n\t" |
2274 "paddw %%mm3, %%mm2 \n\t" | 2273 "paddw %%mm3, %%mm2 \n\t" |
2275 "paddw %%mm4, %%mm2 \n\t" | 2274 "paddw %%mm4, %%mm2 \n\t" |
2276 "psrlw $2, %%mm0 \n\t" | 2275 "psrlw $2, %%mm0 \n\t" |
2277 "psrlw $2, %%mm2 \n\t" | 2276 "psrlw $2, %%mm2 \n\t" |
2278 #endif | 2277 #endif |
2279 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | 2278 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
2280 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | 2279 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" |
2281 | 2280 |
2282 "pmaddwd %%mm0, %%mm1 \n\t" | 2281 "pmaddwd %%mm0, %%mm1 \n\t" |
2283 "pmaddwd %%mm2, %%mm3 \n\t" | 2282 "pmaddwd %%mm2, %%mm3 \n\t" |
2284 "pmaddwd %%mm6, %%mm0 \n\t" | 2283 "pmaddwd %%mm6, %%mm0 \n\t" |
2285 "pmaddwd %%mm6, %%mm2 \n\t" | 2284 "pmaddwd %%mm6, %%mm2 \n\t" |
2286 #ifndef FAST_BGR2YV12 | 2285 #ifndef FAST_BGR2YV12 |
2287 "psrad $8, %%mm0 \n\t" | 2286 "psrad $8, %%mm0 \n\t" |
2288 "psrad $8, %%mm1 \n\t" | 2287 "psrad $8, %%mm1 \n\t" |
2289 "psrad $8, %%mm2 \n\t" | 2288 "psrad $8, %%mm2 \n\t" |
2290 "psrad $8, %%mm3 \n\t" | 2289 "psrad $8, %%mm3 \n\t" |
2291 #endif | 2290 #endif |
2292 "packssdw %%mm2, %%mm0 \n\t" | 2291 "packssdw %%mm2, %%mm0 \n\t" |
2293 "packssdw %%mm3, %%mm1 \n\t" | 2292 "packssdw %%mm3, %%mm1 \n\t" |
2294 "pmaddwd %%mm5, %%mm0 \n\t" | 2293 "pmaddwd %%mm5, %%mm0 \n\t" |
2295 "pmaddwd %%mm5, %%mm1 \n\t" | 2294 "pmaddwd %%mm5, %%mm1 \n\t" |
2296 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | 2295 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 |
2297 "psraw $7, %%mm0 \n\t" | 2296 "psraw $7, %%mm0 \n\t" |
2298 | 2297 |
2299 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2298 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
2300 "movq 12(%0, %%"REG_d"), %%mm4 \n\t" | 2299 "movq 12(%0, %%"REG_d"), %%mm4 \n\t" |
2301 "movq 12(%1, %%"REG_d"), %%mm1 \n\t" | 2300 "movq 12(%1, %%"REG_d"), %%mm1 \n\t" |
2302 "movq 18(%0, %%"REG_d"), %%mm2 \n\t" | 2301 "movq 18(%0, %%"REG_d"), %%mm2 \n\t" |
2303 "movq 18(%1, %%"REG_d"), %%mm3 \n\t" | 2302 "movq 18(%1, %%"REG_d"), %%mm3 \n\t" |
2304 PAVGB" %%mm1, %%mm4 \n\t" | 2303 PAVGB" %%mm1, %%mm4 \n\t" |
2305 PAVGB" %%mm3, %%mm2 \n\t" | 2304 PAVGB" %%mm3, %%mm2 \n\t" |
2306 "movq %%mm4, %%mm1 \n\t" | 2305 "movq %%mm4, %%mm1 \n\t" |
2307 "movq %%mm2, %%mm3 \n\t" | 2306 "movq %%mm2, %%mm3 \n\t" |
2308 "psrlq $24, %%mm4 \n\t" | 2307 "psrlq $24, %%mm4 \n\t" |
2309 "psrlq $24, %%mm2 \n\t" | 2308 "psrlq $24, %%mm2 \n\t" |
2310 PAVGB" %%mm1, %%mm4 \n\t" | 2309 PAVGB" %%mm1, %%mm4 \n\t" |
2311 PAVGB" %%mm3, %%mm2 \n\t" | 2310 PAVGB" %%mm3, %%mm2 \n\t" |
2312 "punpcklbw %%mm7, %%mm4 \n\t" | 2311 "punpcklbw %%mm7, %%mm4 \n\t" |
2313 "punpcklbw %%mm7, %%mm2 \n\t" | 2312 "punpcklbw %%mm7, %%mm2 \n\t" |
2314 #else | 2313 #else |
2315 "movd 12(%0, %%"REG_d"), %%mm4 \n\t" | 2314 "movd 12(%0, %%"REG_d"), %%mm4 \n\t" |
2316 "movd 12(%1, %%"REG_d"), %%mm1 \n\t" | 2315 "movd 12(%1, %%"REG_d"), %%mm1 \n\t" |
2317 "movd 15(%0, %%"REG_d"), %%mm2 \n\t" | 2316 "movd 15(%0, %%"REG_d"), %%mm2 \n\t" |
2318 "movd 15(%1, %%"REG_d"), %%mm3 \n\t" | 2317 "movd 15(%1, %%"REG_d"), %%mm3 \n\t" |
2319 "punpcklbw %%mm7, %%mm4 \n\t" | 2318 "punpcklbw %%mm7, %%mm4 \n\t" |
2320 "punpcklbw %%mm7, %%mm1 \n\t" | 2319 "punpcklbw %%mm7, %%mm1 \n\t" |
2321 "punpcklbw %%mm7, %%mm2 \n\t" | 2320 "punpcklbw %%mm7, %%mm2 \n\t" |
2322 "punpcklbw %%mm7, %%mm3 \n\t" | 2321 "punpcklbw %%mm7, %%mm3 \n\t" |
2323 "paddw %%mm1, %%mm4 \n\t" | 2322 "paddw %%mm1, %%mm4 \n\t" |
2324 "paddw %%mm3, %%mm2 \n\t" | 2323 "paddw %%mm3, %%mm2 \n\t" |
2325 "paddw %%mm2, %%mm4 \n\t" | 2324 "paddw %%mm2, %%mm4 \n\t" |
2326 "movd 18(%0, %%"REG_d"), %%mm5 \n\t" | 2325 "movd 18(%0, %%"REG_d"), %%mm5 \n\t" |
2327 "movd 18(%1, %%"REG_d"), %%mm1 \n\t" | 2326 "movd 18(%1, %%"REG_d"), %%mm1 \n\t" |
2328 "movd 21(%0, %%"REG_d"), %%mm2 \n\t" | 2327 "movd 21(%0, %%"REG_d"), %%mm2 \n\t" |
2329 "movd 21(%1, %%"REG_d"), %%mm3 \n\t" | 2328 "movd 21(%1, %%"REG_d"), %%mm3 \n\t" |
2330 "punpcklbw %%mm7, %%mm5 \n\t" | 2329 "punpcklbw %%mm7, %%mm5 \n\t" |
2331 "punpcklbw %%mm7, %%mm1 \n\t" | 2330 "punpcklbw %%mm7, %%mm1 \n\t" |
2332 "punpcklbw %%mm7, %%mm2 \n\t" | 2331 "punpcklbw %%mm7, %%mm2 \n\t" |
2333 "punpcklbw %%mm7, %%mm3 \n\t" | 2332 "punpcklbw %%mm7, %%mm3 \n\t" |
2334 "paddw %%mm1, %%mm5 \n\t" | 2333 "paddw %%mm1, %%mm5 \n\t" |
2335 "paddw %%mm3, %%mm2 \n\t" | 2334 "paddw %%mm3, %%mm2 \n\t" |
2336 "paddw %%mm5, %%mm2 \n\t" | 2335 "paddw %%mm5, %%mm2 \n\t" |
2337 "movq "MANGLE(w1111)", %%mm5 \n\t" | 2336 "movq "MANGLE(w1111)", %%mm5 \n\t" |
2338 "psrlw $2, %%mm4 \n\t" | 2337 "psrlw $2, %%mm4 \n\t" |
2339 "psrlw $2, %%mm2 \n\t" | 2338 "psrlw $2, %%mm2 \n\t" |
2340 #endif | 2339 #endif |
2341 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" | 2340 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
2342 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | 2341 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" |
2343 | 2342 |
2344 "pmaddwd %%mm4, %%mm1 \n\t" | 2343 "pmaddwd %%mm4, %%mm1 \n\t" |
2345 "pmaddwd %%mm2, %%mm3 \n\t" | 2344 "pmaddwd %%mm2, %%mm3 \n\t" |
2346 "pmaddwd %%mm6, %%mm4 \n\t" | 2345 "pmaddwd %%mm6, %%mm4 \n\t" |
2347 "pmaddwd %%mm6, %%mm2 \n\t" | 2346 "pmaddwd %%mm6, %%mm2 \n\t" |
2348 #ifndef FAST_BGR2YV12 | 2347 #ifndef FAST_BGR2YV12 |
2349 "psrad $8, %%mm4 \n\t" | 2348 "psrad $8, %%mm4 \n\t" |
2350 "psrad $8, %%mm1 \n\t" | 2349 "psrad $8, %%mm1 \n\t" |
2351 "psrad $8, %%mm2 \n\t" | 2350 "psrad $8, %%mm2 \n\t" |
2352 "psrad $8, %%mm3 \n\t" | 2351 "psrad $8, %%mm3 \n\t" |
2353 #endif | 2352 #endif |
2354 "packssdw %%mm2, %%mm4 \n\t" | 2353 "packssdw %%mm2, %%mm4 \n\t" |
2355 "packssdw %%mm3, %%mm1 \n\t" | 2354 "packssdw %%mm3, %%mm1 \n\t" |
2356 "pmaddwd %%mm5, %%mm4 \n\t" | 2355 "pmaddwd %%mm5, %%mm4 \n\t" |
2357 "pmaddwd %%mm5, %%mm1 \n\t" | 2356 "pmaddwd %%mm5, %%mm1 \n\t" |
2358 "add $24, %%"REG_d" \n\t" | 2357 "add $24, %%"REG_d" \n\t" |
2359 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | 2358 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 |
2360 "psraw $7, %%mm4 \n\t" | 2359 "psraw $7, %%mm4 \n\t" |
2361 | 2360 |
2362 "movq %%mm0, %%mm1 \n\t" | 2361 "movq %%mm0, %%mm1 \n\t" |
2363 "punpckldq %%mm4, %%mm0 \n\t" | 2362 "punpckldq %%mm4, %%mm0 \n\t" |
2364 "punpckhdq %%mm4, %%mm1 \n\t" | 2363 "punpckhdq %%mm4, %%mm1 \n\t" |
2365 "packsswb %%mm1, %%mm0 \n\t" | 2364 "packsswb %%mm1, %%mm0 \n\t" |
2366 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" | 2365 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" |
2367 "movd %%mm0, (%2, %%"REG_a") \n\t" | 2366 "movd %%mm0, (%2, %%"REG_a") \n\t" |
2368 "punpckhdq %%mm0, %%mm0 \n\t" | 2367 "punpckhdq %%mm0, %%mm0 \n\t" |
2369 "movd %%mm0, (%3, %%"REG_a") \n\t" | 2368 "movd %%mm0, (%3, %%"REG_a") \n\t" |
2370 "add $4, %%"REG_a" \n\t" | 2369 "add $4, %%"REG_a" \n\t" |
2371 " js 1b \n\t" | 2370 " js 1b \n\t" |
2372 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) | 2371 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) |
2373 : "%"REG_a, "%"REG_d | 2372 : "%"REG_a, "%"REG_d |
2374 ); | 2373 ); |
2375 | 2374 |
2376 udst += chromStride; | 2375 udst += chromStride; |
2377 vdst += chromStride; | 2376 vdst += chromStride; |
2378 src += srcStride*2; | 2377 src += srcStride*2; |
2379 } | 2378 } |
2380 | 2379 |
2381 asm volatile( EMMS" \n\t" | 2380 asm volatile( EMMS" \n\t" |
2382 SFENCE" \n\t" | 2381 SFENCE" \n\t" |
2383 :::"memory"); | 2382 :::"memory"); |
2384 #else | 2383 #else |
2385 y=0; | 2384 y=0; |
2386 #endif | 2385 #endif |
2387 for(; y<height; y+=2) | 2386 for (; y<height; y+=2) |
2388 { | 2387 { |
2389 long i; | 2388 long i; |
2390 for(i=0; i<chromWidth; i++) | 2389 for (i=0; i<chromWidth; i++) |
2391 { | 2390 { |
2392 unsigned int b= src[6*i+0]; | 2391 unsigned int b = src[6*i+0]; |
2393 unsigned int g= src[6*i+1]; | 2392 unsigned int g = src[6*i+1]; |
2394 unsigned int r= src[6*i+2]; | 2393 unsigned int r = src[6*i+2]; |
2395 | 2394 |
2396 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | 2395 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
2397 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128; | 2396 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128; |
2398 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128; | 2397 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128; |
2399 | 2398 |
2400 udst[i] = U; | 2399 udst[i] = U; |
2401 vdst[i] = V; | 2400 vdst[i] = V; |
2402 ydst[2*i] = Y; | 2401 ydst[2*i] = Y; |
2403 | 2402 |
2404 b= src[6*i+3]; | 2403 b = src[6*i+3]; |
2405 g= src[6*i+4]; | 2404 g = src[6*i+4]; |
2406 r= src[6*i+5]; | 2405 r = src[6*i+5]; |
2407 | 2406 |
2408 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | 2407 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
2409 ydst[2*i+1] = Y; | 2408 ydst[2*i+1] = Y; |
2410 } | 2409 } |
2411 ydst += lumStride; | 2410 ydst += lumStride; |
2412 src += srcStride; | 2411 src += srcStride; |
2413 | 2412 |
2414 for(i=0; i<chromWidth; i++) | 2413 for (i=0; i<chromWidth; i++) |
2415 { | 2414 { |
2416 unsigned int b= src[6*i+0]; | 2415 unsigned int b = src[6*i+0]; |
2417 unsigned int g= src[6*i+1]; | 2416 unsigned int g = src[6*i+1]; |
2418 unsigned int r= src[6*i+2]; | 2417 unsigned int r = src[6*i+2]; |
2419 | 2418 |
2420 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | 2419 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
2421 | 2420 |
2422 ydst[2*i] = Y; | 2421 ydst[2*i] = Y; |
2423 | 2422 |
2424 b= src[6*i+3]; | 2423 b = src[6*i+3]; |
2425 g= src[6*i+4]; | 2424 g = src[6*i+4]; |
2426 r= src[6*i+5]; | 2425 r = src[6*i+5]; |
2427 | 2426 |
2428 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | 2427 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
2429 ydst[2*i+1] = Y; | 2428 ydst[2*i+1] = Y; |
2430 } | 2429 } |
2431 udst += chromStride; | 2430 udst += chromStride; |
2432 vdst += chromStride; | 2431 vdst += chromStride; |
2433 ydst += lumStride; | 2432 ydst += lumStride; |
2434 src += srcStride; | 2433 src += srcStride; |
2435 } | 2434 } |
2436 } | 2435 } |
2437 | 2436 |
2438 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, | 2437 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, |
2439 long width, long height, long src1Stride, | 2438 long width, long height, long src1Stride, |
2440 long src2Stride, long dstStride){ | 2439 long src2Stride, long dstStride){ |
2441 long h; | 2440 long h; |
2442 | 2441 |
2443 for(h=0; h < height; h++) | 2442 for (h=0; h < height; h++) |
2444 { | 2443 { |
2445 long w; | 2444 long w; |
2446 | 2445 |
2447 #ifdef HAVE_MMX | 2446 #ifdef HAVE_MMX |
2448 #ifdef HAVE_SSE2 | 2447 #ifdef HAVE_SSE2 |
2449 asm( | 2448 asm( |
2450 "xor %%"REG_a", %%"REG_a" \n\t" | 2449 "xor %%"REG_a", %%"REG_a" \n\t" |
2451 "1: \n\t" | 2450 "1: \n\t" |
2452 PREFETCH" 64(%1, %%"REG_a") \n\t" | 2451 PREFETCH" 64(%1, %%"REG_a") \n\t" |
2453 PREFETCH" 64(%2, %%"REG_a") \n\t" | 2452 PREFETCH" 64(%2, %%"REG_a") \n\t" |
2454 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t" | 2453 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t" |
2455 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t" | 2454 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t" |
2456 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t" | 2455 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t" |
2457 "punpcklbw %%xmm2, %%xmm0 \n\t" | 2456 "punpcklbw %%xmm2, %%xmm0 \n\t" |
2458 "punpckhbw %%xmm2, %%xmm1 \n\t" | 2457 "punpckhbw %%xmm2, %%xmm1 \n\t" |
2459 "movntdq %%xmm0, (%0, %%"REG_a", 2)\n\t" | 2458 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" |
2460 "movntdq %%xmm1, 16(%0, %%"REG_a", 2)\n\t" | 2459 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" |
2461 "add $16, %%"REG_a" \n\t" | 2460 "add $16, %%"REG_a" \n\t" |
2462 "cmp %3, %%"REG_a" \n\t" | 2461 "cmp %3, %%"REG_a" \n\t" |
2463 " jb 1b \n\t" | 2462 " jb 1b \n\t" |
2464 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | 2463 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) |
2465 : "memory", "%"REG_a"" | 2464 : "memory", "%"REG_a"" |
2466 ); | 2465 ); |
2467 #else | 2466 #else |
2468 asm( | 2467 asm( |
2469 "xor %%"REG_a", %%"REG_a" \n\t" | 2468 "xor %%"REG_a", %%"REG_a" \n\t" |
2470 "1: \n\t" | 2469 "1: \n\t" |
2471 PREFETCH" 64(%1, %%"REG_a") \n\t" | 2470 PREFETCH" 64(%1, %%"REG_a") \n\t" |
2472 PREFETCH" 64(%2, %%"REG_a") \n\t" | 2471 PREFETCH" 64(%2, %%"REG_a") \n\t" |
2473 "movq (%1, %%"REG_a"), %%mm0 \n\t" | 2472 "movq (%1, %%"REG_a"), %%mm0 \n\t" |
2474 "movq 8(%1, %%"REG_a"), %%mm2 \n\t" | 2473 "movq 8(%1, %%"REG_a"), %%mm2 \n\t" |
2475 "movq %%mm0, %%mm1 \n\t" | 2474 "movq %%mm0, %%mm1 \n\t" |
2476 "movq %%mm2, %%mm3 \n\t" | 2475 "movq %%mm2, %%mm3 \n\t" |
2477 "movq (%2, %%"REG_a"), %%mm4 \n\t" | 2476 "movq (%2, %%"REG_a"), %%mm4 \n\t" |
2478 "movq 8(%2, %%"REG_a"), %%mm5 \n\t" | 2477 "movq 8(%2, %%"REG_a"), %%mm5 \n\t" |
2479 "punpcklbw %%mm4, %%mm0 \n\t" | 2478 "punpcklbw %%mm4, %%mm0 \n\t" |
2480 "punpckhbw %%mm4, %%mm1 \n\t" | 2479 "punpckhbw %%mm4, %%mm1 \n\t" |
2481 "punpcklbw %%mm5, %%mm2 \n\t" | 2480 "punpcklbw %%mm5, %%mm2 \n\t" |
2482 "punpckhbw %%mm5, %%mm3 \n\t" | 2481 "punpckhbw %%mm5, %%mm3 \n\t" |
2483 MOVNTQ" %%mm0, (%0, %%"REG_a", 2)\n\t" | 2482 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t" |
2484 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2)\n\t" | 2483 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t" |
2485 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2)\n\t" | 2484 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" |
2486 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2)\n\t" | 2485 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" |
2487 "add $16, %%"REG_a" \n\t" | 2486 "add $16, %%"REG_a" \n\t" |
2488 "cmp %3, %%"REG_a" \n\t" | 2487 "cmp %3, %%"REG_a" \n\t" |
2489 " jb 1b \n\t" | 2488 " jb 1b \n\t" |
2490 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) | 2489 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) |
2491 : "memory", "%"REG_a | 2490 : "memory", "%"REG_a |
2492 ); | 2491 ); |
2493 #endif | 2492 #endif |
2494 for(w= (width&(~15)); w < width; w++) | 2493 for (w= (width&(~15)); w < width; w++) |
2495 { | 2494 { |
2496 dest[2*w+0] = src1[w]; | 2495 dest[2*w+0] = src1[w]; |
2497 dest[2*w+1] = src2[w]; | 2496 dest[2*w+1] = src2[w]; |
2498 } | 2497 } |
2499 #else | 2498 #else |
2500 for(w=0; w < width; w++) | 2499 for (w=0; w < width; w++) |
2501 { | 2500 { |
2502 dest[2*w+0] = src1[w]; | 2501 dest[2*w+0] = src1[w]; |
2503 dest[2*w+1] = src2[w]; | 2502 dest[2*w+1] = src2[w]; |
2504 } | 2503 } |
2505 #endif | 2504 #endif |
2506 dest += dstStride; | 2505 dest += dstStride; |
2507 src1 += src1Stride; | 2506 src1 += src1Stride; |
2508 src2 += src2Stride; | 2507 src2 += src2Stride; |
2509 } | 2508 } |
2510 #ifdef HAVE_MMX | 2509 #ifdef HAVE_MMX |
2511 asm( | 2510 asm( |
2512 EMMS" \n\t" | 2511 EMMS" \n\t" |
2513 SFENCE" \n\t" | 2512 SFENCE" \n\t" |
2514 ::: "memory" | 2513 ::: "memory" |
2515 ); | 2514 ); |
2516 #endif | 2515 #endif |
2517 } | 2516 } |
2518 | 2517 |
2519 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | 2518 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
2520 uint8_t *dst1, uint8_t *dst2, | 2519 uint8_t *dst1, uint8_t *dst2, |
2521 long width, long height, | 2520 long width, long height, |
2522 long srcStride1, long srcStride2, | 2521 long srcStride1, long srcStride2, |
2523 long dstStride1, long dstStride2) | 2522 long dstStride1, long dstStride2) |
2524 { | 2523 { |
2525 long y,x,w,h; | 2524 long y,x,w,h; |
2526 w=width/2; h=height/2; | 2525 w=width/2; h=height/2; |
2527 #ifdef HAVE_MMX | 2526 #ifdef HAVE_MMX |
2528 asm volatile( | 2527 asm volatile( |
2529 PREFETCH" %0\n\t" | 2528 PREFETCH" %0 \n\t" |
2530 PREFETCH" %1\n\t" | 2529 PREFETCH" %1 \n\t" |
2531 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); | 2530 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); |
2532 #endif | 2531 #endif |
2533 for(y=0;y<h;y++){ | 2532 for (y=0;y<h;y++){ |
2534 const uint8_t* s1=src1+srcStride1*(y>>1); | 2533 const uint8_t* s1=src1+srcStride1*(y>>1); |
2535 uint8_t* d=dst1+dstStride1*y; | 2534 uint8_t* d=dst1+dstStride1*y; |
2536 x=0; | 2535 x=0; |
2537 #ifdef HAVE_MMX | 2536 #ifdef HAVE_MMX |
2538 for(;x<w-31;x+=32) | 2537 for (;x<w-31;x+=32) |
2539 { | 2538 { |
2540 asm volatile( | 2539 asm volatile( |
2541 PREFETCH" 32%1\n\t" | 2540 PREFETCH" 32%1 \n\t" |
2542 "movq %1, %%mm0\n\t" | 2541 "movq %1, %%mm0 \n\t" |
2543 "movq 8%1, %%mm2\n\t" | 2542 "movq 8%1, %%mm2 \n\t" |
2544 "movq 16%1, %%mm4\n\t" | 2543 "movq 16%1, %%mm4 \n\t" |
2545 "movq 24%1, %%mm6\n\t" | 2544 "movq 24%1, %%mm6 \n\t" |
2546 "movq %%mm0, %%mm1\n\t" | 2545 "movq %%mm0, %%mm1 \n\t" |
2547 "movq %%mm2, %%mm3\n\t" | 2546 "movq %%mm2, %%mm3 \n\t" |
2548 "movq %%mm4, %%mm5\n\t" | 2547 "movq %%mm4, %%mm5 \n\t" |
2549 "movq %%mm6, %%mm7\n\t" | 2548 "movq %%mm6, %%mm7 \n\t" |
2550 "punpcklbw %%mm0, %%mm0\n\t" | 2549 "punpcklbw %%mm0, %%mm0 \n\t" |
2551 "punpckhbw %%mm1, %%mm1\n\t" | 2550 "punpckhbw %%mm1, %%mm1 \n\t" |
2552 "punpcklbw %%mm2, %%mm2\n\t" | 2551 "punpcklbw %%mm2, %%mm2 \n\t" |
2553 "punpckhbw %%mm3, %%mm3\n\t" | 2552 "punpckhbw %%mm3, %%mm3 \n\t" |
2554 "punpcklbw %%mm4, %%mm4\n\t" | 2553 "punpcklbw %%mm4, %%mm4 \n\t" |
2555 "punpckhbw %%mm5, %%mm5\n\t" | 2554 "punpckhbw %%mm5, %%mm5 \n\t" |
2556 "punpcklbw %%mm6, %%mm6\n\t" | 2555 "punpcklbw %%mm6, %%mm6 \n\t" |
2557 "punpckhbw %%mm7, %%mm7\n\t" | 2556 "punpckhbw %%mm7, %%mm7 \n\t" |
2558 MOVNTQ" %%mm0, %0\n\t" | 2557 MOVNTQ" %%mm0, %0 \n\t" |
2559 MOVNTQ" %%mm1, 8%0\n\t" | 2558 MOVNTQ" %%mm1, 8%0 \n\t" |
2560 MOVNTQ" %%mm2, 16%0\n\t" | 2559 MOVNTQ" %%mm2, 16%0 \n\t" |
2561 MOVNTQ" %%mm3, 24%0\n\t" | 2560 MOVNTQ" %%mm3, 24%0 \n\t" |
2562 MOVNTQ" %%mm4, 32%0\n\t" | 2561 MOVNTQ" %%mm4, 32%0 \n\t" |
2563 MOVNTQ" %%mm5, 40%0\n\t" | 2562 MOVNTQ" %%mm5, 40%0 \n\t" |
2564 MOVNTQ" %%mm6, 48%0\n\t" | 2563 MOVNTQ" %%mm6, 48%0 \n\t" |
2565 MOVNTQ" %%mm7, 56%0" | 2564 MOVNTQ" %%mm7, 56%0" |
2566 :"=m"(d[2*x]) | 2565 :"=m"(d[2*x]) |
2567 :"m"(s1[x]) | 2566 :"m"(s1[x]) |
2568 :"memory"); | 2567 :"memory"); |
2569 } | 2568 } |
2570 #endif | 2569 #endif |
2571 for(;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; | 2570 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
2572 } | 2571 } |
2573 for(y=0;y<h;y++){ | 2572 for (y=0;y<h;y++){ |
2574 const uint8_t* s2=src2+srcStride2*(y>>1); | 2573 const uint8_t* s2=src2+srcStride2*(y>>1); |
2575 uint8_t* d=dst2+dstStride2*y; | 2574 uint8_t* d=dst2+dstStride2*y; |
2576 x=0; | 2575 x=0; |
2577 #ifdef HAVE_MMX | 2576 #ifdef HAVE_MMX |
2578 for(;x<w-31;x+=32) | 2577 for (;x<w-31;x+=32) |
2579 { | 2578 { |
2580 asm volatile( | 2579 asm volatile( |
2581 PREFETCH" 32%1\n\t" | 2580 PREFETCH" 32%1 \n\t" |
2582 "movq %1, %%mm0\n\t" | 2581 "movq %1, %%mm0 \n\t" |
2583 "movq 8%1, %%mm2\n\t" | 2582 "movq 8%1, %%mm2 \n\t" |
2584 "movq 16%1, %%mm4\n\t" | 2583 "movq 16%1, %%mm4 \n\t" |
2585 "movq 24%1, %%mm6\n\t" | 2584 "movq 24%1, %%mm6 \n\t" |
2586 "movq %%mm0, %%mm1\n\t" | 2585 "movq %%mm0, %%mm1 \n\t" |
2587 "movq %%mm2, %%mm3\n\t" | 2586 "movq %%mm2, %%mm3 \n\t" |
2588 "movq %%mm4, %%mm5\n\t" | 2587 "movq %%mm4, %%mm5 \n\t" |
2589 "movq %%mm6, %%mm7\n\t" | 2588 "movq %%mm6, %%mm7 \n\t" |
2590 "punpcklbw %%mm0, %%mm0\n\t" | 2589 "punpcklbw %%mm0, %%mm0 \n\t" |
2591 "punpckhbw %%mm1, %%mm1\n\t" | 2590 "punpckhbw %%mm1, %%mm1 \n\t" |
2592 "punpcklbw %%mm2, %%mm2\n\t" | 2591 "punpcklbw %%mm2, %%mm2 \n\t" |
2593 "punpckhbw %%mm3, %%mm3\n\t" | 2592 "punpckhbw %%mm3, %%mm3 \n\t" |
2594 "punpcklbw %%mm4, %%mm4\n\t" | 2593 "punpcklbw %%mm4, %%mm4 \n\t" |
2595 "punpckhbw %%mm5, %%mm5\n\t" | 2594 "punpckhbw %%mm5, %%mm5 \n\t" |
2596 "punpcklbw %%mm6, %%mm6\n\t" | 2595 "punpcklbw %%mm6, %%mm6 \n\t" |
2597 "punpckhbw %%mm7, %%mm7\n\t" | 2596 "punpckhbw %%mm7, %%mm7 \n\t" |
2598 MOVNTQ" %%mm0, %0\n\t" | 2597 MOVNTQ" %%mm0, %0 \n\t" |
2599 MOVNTQ" %%mm1, 8%0\n\t" | 2598 MOVNTQ" %%mm1, 8%0 \n\t" |
2600 MOVNTQ" %%mm2, 16%0\n\t" | 2599 MOVNTQ" %%mm2, 16%0 \n\t" |
2601 MOVNTQ" %%mm3, 24%0\n\t" | 2600 MOVNTQ" %%mm3, 24%0 \n\t" |
2602 MOVNTQ" %%mm4, 32%0\n\t" | 2601 MOVNTQ" %%mm4, 32%0 \n\t" |
2603 MOVNTQ" %%mm5, 40%0\n\t" | 2602 MOVNTQ" %%mm5, 40%0 \n\t" |
2604 MOVNTQ" %%mm6, 48%0\n\t" | 2603 MOVNTQ" %%mm6, 48%0 \n\t" |
2605 MOVNTQ" %%mm7, 56%0" | 2604 MOVNTQ" %%mm7, 56%0" |
2606 :"=m"(d[2*x]) | 2605 :"=m"(d[2*x]) |
2607 :"m"(s2[x]) | 2606 :"m"(s2[x]) |
2608 :"memory"); | 2607 :"memory"); |
2609 } | 2608 } |
2610 #endif | 2609 #endif |
2611 for(;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; | 2610 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |
2612 } | 2611 } |
2613 #ifdef HAVE_MMX | 2612 #ifdef HAVE_MMX |
2614 asm( | 2613 asm( |
2615 EMMS" \n\t" | 2614 EMMS" \n\t" |
2616 SFENCE" \n\t" | 2615 SFENCE" \n\t" |
2617 ::: "memory" | 2616 ::: "memory" |
2618 ); | 2617 ); |
2619 #endif | 2618 #endif |
2620 } | 2619 } |
2621 | 2620 |
2622 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, | 2621 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, |
2623 uint8_t *dst, | 2622 uint8_t *dst, |
2624 long width, long height, | 2623 long width, long height, |
2625 long srcStride1, long srcStride2, | 2624 long srcStride1, long srcStride2, |
2626 long srcStride3, long dstStride) | 2625 long srcStride3, long dstStride) |
2627 { | 2626 { |
2628 long y,x,w,h; | 2627 long y,x,w,h; |
2629 w=width/2; h=height; | 2628 w=width/2; h=height; |
2630 for(y=0;y<h;y++){ | 2629 for (y=0;y<h;y++){ |
2631 const uint8_t* yp=src1+srcStride1*y; | 2630 const uint8_t* yp=src1+srcStride1*y; |
2632 const uint8_t* up=src2+srcStride2*(y>>2); | 2631 const uint8_t* up=src2+srcStride2*(y>>2); |
2633 const uint8_t* vp=src3+srcStride3*(y>>2); | 2632 const uint8_t* vp=src3+srcStride3*(y>>2); |
2634 uint8_t* d=dst+dstStride*y; | 2633 uint8_t* d=dst+dstStride*y; |
2635 x=0; | 2634 x=0; |
2636 #ifdef HAVE_MMX | 2635 #ifdef HAVE_MMX |
2637 for(;x<w-7;x+=8) | 2636 for (;x<w-7;x+=8) |
2638 { | 2637 { |
2639 asm volatile( | 2638 asm volatile( |
2640 PREFETCH" 32(%1, %0)\n\t" | 2639 PREFETCH" 32(%1, %0) \n\t" |
2641 PREFETCH" 32(%2, %0)\n\t" | 2640 PREFETCH" 32(%2, %0) \n\t" |
2642 PREFETCH" 32(%3, %0)\n\t" | 2641 PREFETCH" 32(%3, %0) \n\t" |
2643 "movq (%1, %0, 4), %%mm0\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ | 2642 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ |
2644 "movq (%2, %0), %%mm1\n\t" /* U0U1U2U3U4U5U6U7 */ | 2643 "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */ |
2645 "movq (%3, %0), %%mm2\n\t" /* V0V1V2V3V4V5V6V7 */ | 2644 "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */ |
2646 "movq %%mm0, %%mm3\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ | 2645 "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ |
2647 "movq %%mm1, %%mm4\n\t" /* U0U1U2U3U4U5U6U7 */ | 2646 "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */ |
2648 "movq %%mm2, %%mm5\n\t" /* V0V1V2V3V4V5V6V7 */ | 2647 "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */ |
2649 "punpcklbw %%mm1, %%mm1\n\t" /* U0U0 U1U1 U2U2 U3U3 */ | 2648 "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */ |
2650 "punpcklbw %%mm2, %%mm2\n\t" /* V0V0 V1V1 V2V2 V3V3 */ | 2649 "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */ |
2651 "punpckhbw %%mm4, %%mm4\n\t" /* U4U4 U5U5 U6U6 U7U7 */ | 2650 "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */ |
2652 "punpckhbw %%mm5, %%mm5\n\t" /* V4V4 V5V5 V6V6 V7V7 */ | 2651 "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */ |
2653 | 2652 |
2654 "movq %%mm1, %%mm6\n\t" | 2653 "movq %%mm1, %%mm6 \n\t" |
2655 "punpcklbw %%mm2, %%mm1\n\t" /* U0V0 U0V0 U1V1 U1V1*/ | 2654 "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/ |
2656 "punpcklbw %%mm1, %%mm0\n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ | 2655 "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ |
2657 "punpckhbw %%mm1, %%mm3\n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ | 2656 "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ |
2658 MOVNTQ" %%mm0, (%4, %0, 8)\n\t" | 2657 MOVNTQ" %%mm0, (%4, %0, 8) \n\t" |
2659 MOVNTQ" %%mm3, 8(%4, %0, 8)\n\t" | 2658 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t" |
2660 | 2659 |
2661 "punpckhbw %%mm2, %%mm6\n\t" /* U2V2 U2V2 U3V3 U3V3*/ | 2660 "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/ |
2662 "movq 8(%1, %0, 4), %%mm0\n\t" | 2661 "movq 8(%1, %0, 4), %%mm0 \n\t" |
2663 "movq %%mm0, %%mm3\n\t" | 2662 "movq %%mm0, %%mm3 \n\t" |
2664 "punpcklbw %%mm6, %%mm0\n\t" /* Y U2 Y V2 Y U2 Y V2*/ | 2663 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/ |
2665 "punpckhbw %%mm6, %%mm3\n\t" /* Y U3 Y V3 Y U3 Y V3*/ | 2664 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/ |
2666 MOVNTQ" %%mm0, 16(%4, %0, 8)\n\t" | 2665 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t" |
2667 MOVNTQ" %%mm3, 24(%4, %0, 8)\n\t" | 2666 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t" |
2668 | 2667 |
2669 "movq %%mm4, %%mm6\n\t" | 2668 "movq %%mm4, %%mm6 \n\t" |
2670 "movq 16(%1, %0, 4), %%mm0\n\t" | 2669 "movq 16(%1, %0, 4), %%mm0 \n\t" |
2671 "movq %%mm0, %%mm3\n\t" | 2670 "movq %%mm0, %%mm3 \n\t" |
2672 "punpcklbw %%mm5, %%mm4\n\t" | 2671 "punpcklbw %%mm5, %%mm4 \n\t" |
2673 "punpcklbw %%mm4, %%mm0\n\t" /* Y U4 Y V4 Y U4 Y V4*/ | 2672 "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/ |
2674 "punpckhbw %%mm4, %%mm3\n\t" /* Y U5 Y V5 Y U5 Y V5*/ | 2673 "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/ |
2675 MOVNTQ" %%mm0, 32(%4, %0, 8)\n\t" | 2674 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t" |
2676 MOVNTQ" %%mm3, 40(%4, %0, 8)\n\t" | 2675 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t" |
2677 | 2676 |
2678 "punpckhbw %%mm5, %%mm6\n\t" | 2677 "punpckhbw %%mm5, %%mm6 \n\t" |
2679 "movq 24(%1, %0, 4), %%mm0\n\t" | 2678 "movq 24(%1, %0, 4), %%mm0 \n\t" |
2680 "movq %%mm0, %%mm3\n\t" | 2679 "movq %%mm0, %%mm3 \n\t" |
2681 "punpcklbw %%mm6, %%mm0\n\t" /* Y U6 Y V6 Y U6 Y V6*/ | 2680 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/ |
2682 "punpckhbw %%mm6, %%mm3\n\t" /* Y U7 Y V7 Y U7 Y V7*/ | 2681 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/ |
2683 MOVNTQ" %%mm0, 48(%4, %0, 8)\n\t" | 2682 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t" |
2684 MOVNTQ" %%mm3, 56(%4, %0, 8)\n\t" | 2683 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t" |
2685 | 2684 |
2686 : "+r" (x) | 2685 : "+r" (x) |
2687 : "r"(yp), "r" (up), "r"(vp), "r"(d) | 2686 : "r"(yp), "r" (up), "r"(vp), "r"(d) |
2688 :"memory"); | 2687 :"memory"); |
2689 } | 2688 } |
2690 #endif | 2689 #endif |
2691 for(; x<w; x++) | 2690 for (; x<w; x++) |
2692 { | 2691 { |
2693 const long x2= x<<2; | 2692 const long x2 = x<<2; |
2694 d[8*x+0]=yp[x2]; | 2693 d[8*x+0] = yp[x2]; |
2695 d[8*x+1]=up[x]; | 2694 d[8*x+1] = up[x]; |
2696 d[8*x+2]=yp[x2+1]; | 2695 d[8*x+2] = yp[x2+1]; |
2697 d[8*x+3]=vp[x]; | 2696 d[8*x+3] = vp[x]; |
2698 d[8*x+4]=yp[x2+2]; | 2697 d[8*x+4] = yp[x2+2]; |
2699 d[8*x+5]=up[x]; | 2698 d[8*x+5] = up[x]; |
2700 d[8*x+6]=yp[x2+3]; | 2699 d[8*x+6] = yp[x2+3]; |
2701 d[8*x+7]=vp[x]; | 2700 d[8*x+7] = vp[x]; |
2702 } | 2701 } |
2703 } | 2702 } |
2704 #ifdef HAVE_MMX | 2703 #ifdef HAVE_MMX |
2705 asm( | 2704 asm( |
2706 EMMS" \n\t" | 2705 EMMS" \n\t" |
2707 SFENCE" \n\t" | 2706 SFENCE" \n\t" |
2708 ::: "memory" | 2707 ::: "memory" |
2709 ); | 2708 ); |
2710 #endif | 2709 #endif |
2711 } | 2710 } |
2712 | 2711 |
2713 static inline void RENAME(rgb2rgb_init)(void){ | 2712 static inline void RENAME(rgb2rgb_init)(void){ |
2714 rgb15to16= RENAME(rgb15to16); | 2713 rgb15to16 = RENAME(rgb15to16); |
2715 rgb15to24= RENAME(rgb15to24); | 2714 rgb15to24 = RENAME(rgb15to24); |
2716 rgb15to32= RENAME(rgb15to32); | 2715 rgb15to32 = RENAME(rgb15to32); |
2717 rgb16to24= RENAME(rgb16to24); | 2716 rgb16to24 = RENAME(rgb16to24); |
2718 rgb16to32= RENAME(rgb16to32); | 2717 rgb16to32 = RENAME(rgb16to32); |
2719 rgb16to15= RENAME(rgb16to15); | 2718 rgb16to15 = RENAME(rgb16to15); |
2720 rgb24to16= RENAME(rgb24to16); | 2719 rgb24to16 = RENAME(rgb24to16); |
2721 rgb24to15= RENAME(rgb24to15); | 2720 rgb24to15 = RENAME(rgb24to15); |
2722 rgb24to32= RENAME(rgb24to32); | 2721 rgb24to32 = RENAME(rgb24to32); |
2723 rgb32to16= RENAME(rgb32to16); | 2722 rgb32to16 = RENAME(rgb32to16); |
2724 rgb32to15= RENAME(rgb32to15); | 2723 rgb32to15 = RENAME(rgb32to15); |
2725 rgb32to24= RENAME(rgb32to24); | 2724 rgb32to24 = RENAME(rgb32to24); |
2726 rgb24tobgr15= RENAME(rgb24tobgr15); | 2725 rgb24tobgr15 = RENAME(rgb24tobgr15); |
2727 rgb24tobgr16= RENAME(rgb24tobgr16); | 2726 rgb24tobgr16 = RENAME(rgb24tobgr16); |
2728 rgb24tobgr24= RENAME(rgb24tobgr24); | 2727 rgb24tobgr24 = RENAME(rgb24tobgr24); |
2729 rgb32tobgr32= RENAME(rgb32tobgr32); | 2728 rgb32tobgr32 = RENAME(rgb32tobgr32); |
2730 rgb32tobgr16= RENAME(rgb32tobgr16); | 2729 rgb32tobgr16 = RENAME(rgb32tobgr16); |
2731 rgb32tobgr15= RENAME(rgb32tobgr15); | 2730 rgb32tobgr15 = RENAME(rgb32tobgr15); |
2732 yv12toyuy2= RENAME(yv12toyuy2); | 2731 yv12toyuy2 = RENAME(yv12toyuy2); |
2733 yv12touyvy= RENAME(yv12touyvy); | 2732 yv12touyvy = RENAME(yv12touyvy); |
2734 yuv422ptoyuy2= RENAME(yuv422ptoyuy2); | 2733 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); |
2735 yuy2toyv12= RENAME(yuy2toyv12); | 2734 yuy2toyv12 = RENAME(yuy2toyv12); |
2736 // uyvytoyv12= RENAME(uyvytoyv12); | 2735 // uyvytoyv12 = RENAME(uyvytoyv12); |
2737 // yvu9toyv12= RENAME(yvu9toyv12); | 2736 // yvu9toyv12 = RENAME(yvu9toyv12); |
2738 planar2x= RENAME(planar2x); | 2737 planar2x = RENAME(planar2x); |
2739 rgb24toyv12= RENAME(rgb24toyv12); | 2738 rgb24toyv12 = RENAME(rgb24toyv12); |
2740 interleaveBytes= RENAME(interleaveBytes); | 2739 interleaveBytes = RENAME(interleaveBytes); |
2741 vu9_to_vu12= RENAME(vu9_to_vu12); | 2740 vu9_to_vu12 = RENAME(vu9_to_vu12); |
2742 yvu9_to_yuy2= RENAME(yvu9_to_yuy2); | 2741 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); |
2743 } | 2742 } |