Mercurial > mplayer.hg
comparison postproc/rgb2rgb_template.c @ 6492:e7635c03910f
sync with mplayer xp
- partial yvu9 support (copy only)
- rgb 15/16 -> 24/32 converters
- int->unsigned changes
author | arpi |
---|---|
date | Sat, 22 Jun 2002 08:49:45 +0000 |
parents | c5cf988c6d6f |
children | f98313dcd428 |
comparison
equal
deleted
inserted
replaced
6491:920796b6c7b1 | 6492:e7635c03910f |
---|---|
5 * Software YUV to YUV convertor | 5 * Software YUV to YUV convertor |
6 * Software YUV to RGB convertor | 6 * Software YUV to RGB convertor |
7 * Written by Nick Kurshev. | 7 * Written by Nick Kurshev. |
8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) | 8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) |
9 */ | 9 */ |
10 | |
11 #include <stddef.h> | |
12 #include <inttypes.h> /* for __WORDSIZE */ | |
13 | |
14 #ifndef __WORDSIZE | |
15 #warning You have misconfigured system and probably will lose performance! | |
16 #endif | |
10 | 17 |
11 #undef PREFETCH | 18 #undef PREFETCH |
12 #undef MOVNTQ | 19 #undef MOVNTQ |
13 #undef EMMS | 20 #undef EMMS |
14 #undef SFENCE | 21 #undef SFENCE |
54 { | 61 { |
55 uint8_t *dest = dst; | 62 uint8_t *dest = dst; |
56 const uint8_t *s = src; | 63 const uint8_t *s = src; |
57 const uint8_t *end; | 64 const uint8_t *end; |
58 #ifdef HAVE_MMX | 65 #ifdef HAVE_MMX |
59 const uint8_t *mm_end; | 66 uint8_t *mm_end; |
60 #endif | 67 #endif |
61 end = s + src_size; | 68 end = s + src_size; |
62 #ifdef HAVE_MMX | 69 #ifdef HAVE_MMX |
63 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 70 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
64 mm_end = end - 23; | |
65 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); | 71 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); |
72 mm_end = (uint8_t*)((((unsigned long)end)/24)*24); | |
66 while(s < mm_end) | 73 while(s < mm_end) |
67 { | 74 { |
68 __asm __volatile( | 75 __asm __volatile( |
69 PREFETCH" 32%1\n\t" | 76 PREFETCH" 32%1\n\t" |
70 "movd %1, %%mm0\n\t" | 77 "movd %1, %%mm0\n\t" |
105 { | 112 { |
106 uint8_t *dest = dst; | 113 uint8_t *dest = dst; |
107 const uint8_t *s = src; | 114 const uint8_t *s = src; |
108 const uint8_t *end; | 115 const uint8_t *end; |
109 #ifdef HAVE_MMX | 116 #ifdef HAVE_MMX |
110 const uint8_t *mm_end; | 117 uint8_t *mm_end; |
111 #endif | 118 #endif |
112 end = s + src_size; | 119 end = s + src_size; |
113 #ifdef HAVE_MMX | 120 #ifdef HAVE_MMX |
114 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 121 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
115 mm_end = end - 31; | 122 mm_end = (uint8_t*)((((unsigned long)end)/32)*32); |
116 while(s < mm_end) | 123 while(s < mm_end) |
117 { | 124 { |
118 __asm __volatile( | 125 __asm __volatile( |
119 PREFETCH" 32%1\n\t" | 126 PREFETCH" 32%1\n\t" |
120 "movq %1, %%mm0\n\t" | 127 "movq %1, %%mm0\n\t" |
184 MMX2, 3DNOW optimization by Nick Kurshev | 191 MMX2, 3DNOW optimization by Nick Kurshev |
185 32bit c version, and and&add trick by Michael Niedermayer | 192 32bit c version, and and&add trick by Michael Niedermayer |
186 */ | 193 */ |
187 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) | 194 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) |
188 { | 195 { |
189 #ifdef HAVE_MMX | 196 register const uint8_t* s=src; |
190 register int offs=15-src_size; | 197 register uint8_t* d=dst; |
191 register const char* s=src-offs; | 198 register const uint8_t *end; |
192 register char* d=dst-offs; | 199 uint8_t *mm_end; |
193 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs))); | 200 end = s + src_size; |
194 __asm __volatile( | 201 #ifdef HAVE_MMX |
195 "movq %0, %%mm4\n\t" | 202 __asm __volatile(PREFETCH" %0"::"m"(*s)); |
196 ::"m"(mask15s)); | 203 __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); |
197 while(offs<0) | 204 mm_end = (uint8_t*)((((unsigned long)end)/16)*16); |
205 while(s<mm_end) | |
198 { | 206 { |
199 __asm __volatile( | 207 __asm __volatile( |
200 PREFETCH" 32%1\n\t" | 208 PREFETCH" 32%1\n\t" |
201 "movq %1, %%mm0\n\t" | 209 "movq %1, %%mm0\n\t" |
202 "movq 8%1, %%mm2\n\t" | 210 "movq 8%1, %%mm2\n\t" |
206 "pand %%mm4, %%mm2\n\t" | 214 "pand %%mm4, %%mm2\n\t" |
207 "paddw %%mm1, %%mm0\n\t" | 215 "paddw %%mm1, %%mm0\n\t" |
208 "paddw %%mm3, %%mm2\n\t" | 216 "paddw %%mm3, %%mm2\n\t" |
209 MOVNTQ" %%mm0, %0\n\t" | 217 MOVNTQ" %%mm0, %0\n\t" |
210 MOVNTQ" %%mm2, 8%0" | 218 MOVNTQ" %%mm2, 8%0" |
211 :"=m"(*(d+offs)) | 219 :"=m"(*d) |
212 :"m"(*(s+offs)) | 220 :"m"(*s) |
213 ); | 221 ); |
214 offs+=16; | 222 d+=16; |
223 s+=16; | |
215 } | 224 } |
216 __asm __volatile(SFENCE:::"memory"); | 225 __asm __volatile(SFENCE:::"memory"); |
217 __asm __volatile(EMMS:::"memory"); | 226 __asm __volatile(EMMS:::"memory"); |
218 #else | 227 #endif |
219 #if 0 | 228 mm_end = (uint8_t*)((((unsigned long)end)/4)*4); |
220 const uint16_t *s1=( uint16_t * )src; | 229 while(s < mm_end) |
221 uint16_t *d1=( uint16_t * )dst; | 230 { |
222 uint16_t *e=((uint8_t *)s1)+src_size; | 231 register unsigned x= *((uint32_t *)s); |
223 while( s1<e ){ | 232 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); |
224 register int x=*( s1++ ); | 233 d+=4; |
225 /* rrrrrggggggbbbbb | 234 s+=4; |
226 0rrrrrgggggbbbbb | 235 } |
227 0111 1111 1110 0000=0x7FE0 | 236 if(s < end) |
228 00000000000001 1111=0x001F */ | 237 { |
229 *( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 ); | 238 register unsigned short x= *((uint16_t *)s); |
230 } | 239 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); |
231 #else | 240 } |
232 const unsigned *s1=( unsigned * )src; | |
233 unsigned *d1=( unsigned * )dst; | |
234 int i; | |
235 int size= src_size>>2; | |
236 for(i=0; i<size; i++) | |
237 { | |
238 register int x= s1[i]; | |
239 // d1[i] = x + (x&0x7FE07FE0); //faster but need msbit =0 which might not allways be true | |
240 d1[i] = (x&0x7FFF7FFF) + (x&0x7FE07FE0); | |
241 | |
242 } | |
243 #endif | |
244 #endif | |
245 } | 241 } |
246 | 242 |
247 static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | 243 static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
248 { | 244 { |
249 unsigned j,i,num_pixels=src_size/3; | 245 unsigned j,i,num_pixels=src_size/3; |
255 } | 251 } |
256 } | 252 } |
257 | 253 |
258 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) | 254 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
259 { | 255 { |
260 #ifdef HAVE_MMX | |
261 const uint8_t *s = src; | 256 const uint8_t *s = src; |
262 const uint8_t *end,*mm_end; | 257 const uint8_t *end; |
258 #ifdef HAVE_MMX | |
259 const uint8_t *mm_end; | |
260 #endif | |
263 uint16_t *d = (uint16_t *)dst; | 261 uint16_t *d = (uint16_t *)dst; |
264 end = s + src_size; | 262 end = s + src_size; |
265 mm_end = end - 15; | 263 #ifdef HAVE_MMX |
266 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 264 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
267 __asm __volatile( | 265 __asm __volatile( |
268 "movq %0, %%mm7\n\t" | 266 "movq %0, %%mm7\n\t" |
269 "movq %1, %%mm6\n\t" | 267 "movq %1, %%mm6\n\t" |
270 ::"m"(red_16mask),"m"(green_16mask)); | 268 ::"m"(red_16mask),"m"(green_16mask)); |
269 mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |
271 while(s < mm_end) | 270 while(s < mm_end) |
272 { | 271 { |
273 __asm __volatile( | 272 __asm __volatile( |
274 PREFETCH" 32%1\n\t" | 273 PREFETCH" 32%1\n\t" |
275 "movd %1, %%mm0\n\t" | 274 "movd %1, %%mm0\n\t" |
301 MOVNTQ" %%mm0, %0\n\t" | 300 MOVNTQ" %%mm0, %0\n\t" |
302 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | 301 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
303 d += 4; | 302 d += 4; |
304 s += 16; | 303 s += 16; |
305 } | 304 } |
305 __asm __volatile(SFENCE:::"memory"); | |
306 __asm __volatile(EMMS:::"memory"); | |
307 #endif | |
306 while(s < end) | 308 while(s < end) |
307 { | 309 { |
308 const int b= *s++; | 310 const int b= *s++; |
309 const int g= *s++; | 311 const int g= *s++; |
310 const int r= *s++; | 312 const int r= *s++; |
311 s++; | |
312 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | 313 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
313 } | 314 s++; |
314 __asm __volatile(SFENCE:::"memory"); | 315 } |
315 __asm __volatile(EMMS:::"memory"); | |
316 #else | |
317 unsigned j,i,num_pixels=src_size/4; | |
318 uint16_t *d = (uint16_t *)dst; | |
319 for(i=0,j=0; j<num_pixels; i+=4,j++) | |
320 { | |
321 const int b= src[i+0]; | |
322 const int g= src[i+1]; | |
323 const int r= src[i+2]; | |
324 | |
325 d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | |
326 } | |
327 #endif | |
328 } | 316 } |
329 | 317 |
330 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) | 318 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
331 { | 319 { |
332 #ifdef HAVE_MMX | |
333 const uint8_t *s = src; | 320 const uint8_t *s = src; |
334 const uint8_t *end,*mm_end; | 321 const uint8_t *end; |
322 #ifdef HAVE_MMX | |
323 const uint8_t *mm_end; | |
324 #endif | |
335 uint16_t *d = (uint16_t *)dst; | 325 uint16_t *d = (uint16_t *)dst; |
336 end = s + src_size; | 326 end = s + src_size; |
337 mm_end = end - 15; | 327 #ifdef HAVE_MMX |
338 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 328 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
339 __asm __volatile( | 329 __asm __volatile( |
340 "movq %0, %%mm7\n\t" | 330 "movq %0, %%mm7\n\t" |
341 "movq %1, %%mm6\n\t" | 331 "movq %1, %%mm6\n\t" |
342 ::"m"(red_15mask),"m"(green_15mask)); | 332 ::"m"(red_15mask),"m"(green_15mask)); |
333 mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |
343 while(s < mm_end) | 334 while(s < mm_end) |
344 { | 335 { |
345 __asm __volatile( | 336 __asm __volatile( |
346 PREFETCH" 32%1\n\t" | 337 PREFETCH" 32%1\n\t" |
347 "movd %1, %%mm0\n\t" | 338 "movd %1, %%mm0\n\t" |
373 MOVNTQ" %%mm0, %0\n\t" | 364 MOVNTQ" %%mm0, %0\n\t" |
374 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | 365 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
375 d += 4; | 366 d += 4; |
376 s += 16; | 367 s += 16; |
377 } | 368 } |
369 __asm __volatile(SFENCE:::"memory"); | |
370 __asm __volatile(EMMS:::"memory"); | |
371 #endif | |
378 while(s < end) | 372 while(s < end) |
379 { | 373 { |
380 const int b= *s++; | 374 const int b= *s++; |
381 const int g= *s++; | 375 const int g= *s++; |
382 const int r= *s++; | 376 const int r= *s++; |
377 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | |
383 s++; | 378 s++; |
384 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | 379 } |
385 } | |
386 __asm __volatile(SFENCE:::"memory"); | |
387 __asm __volatile(EMMS:::"memory"); | |
388 #else | |
389 unsigned j,i,num_pixels=src_size/4; | |
390 uint16_t *d = (uint16_t *)dst; | |
391 for(i=0,j=0; j<num_pixels; i+=4,j++) | |
392 { | |
393 const int b= src[i+0]; | |
394 const int g= src[i+1]; | |
395 const int r= src[i+2]; | |
396 | |
397 d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | |
398 } | |
399 #endif | |
400 } | 380 } |
401 | 381 |
402 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) | 382 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
403 { | 383 { |
404 #ifdef HAVE_MMX | |
405 const uint8_t *s = src; | 384 const uint8_t *s = src; |
406 const uint8_t *end,*mm_end; | 385 const uint8_t *end; |
386 #ifdef HAVE_MMX | |
387 const uint8_t *mm_end; | |
388 #endif | |
407 uint16_t *d = (uint16_t *)dst; | 389 uint16_t *d = (uint16_t *)dst; |
408 end = s + src_size; | 390 end = s + src_size; |
409 mm_end = end - 11; | 391 #ifdef HAVE_MMX |
410 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 392 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
411 __asm __volatile( | 393 __asm __volatile( |
412 "movq %0, %%mm7\n\t" | 394 "movq %0, %%mm7\n\t" |
413 "movq %1, %%mm6\n\t" | 395 "movq %1, %%mm6\n\t" |
414 ::"m"(red_16mask),"m"(green_16mask)); | 396 ::"m"(red_16mask),"m"(green_16mask)); |
397 mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |
415 while(s < mm_end) | 398 while(s < mm_end) |
416 { | 399 { |
417 __asm __volatile( | 400 __asm __volatile( |
418 PREFETCH" 32%1\n\t" | 401 PREFETCH" 32%1\n\t" |
419 "movd %1, %%mm0\n\t" | 402 "movd %1, %%mm0\n\t" |
445 MOVNTQ" %%mm0, %0\n\t" | 428 MOVNTQ" %%mm0, %0\n\t" |
446 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | 429 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
447 d += 4; | 430 d += 4; |
448 s += 12; | 431 s += 12; |
449 } | 432 } |
433 __asm __volatile(SFENCE:::"memory"); | |
434 __asm __volatile(EMMS:::"memory"); | |
435 #endif | |
450 while(s < end) | 436 while(s < end) |
451 { | 437 { |
452 const int b= *s++; | 438 const int b= *s++; |
453 const int g= *s++; | 439 const int g= *s++; |
454 const int r= *s++; | 440 const int r= *s++; |
455 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | 441 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
456 } | 442 } |
457 __asm __volatile(SFENCE:::"memory"); | |
458 __asm __volatile(EMMS:::"memory"); | |
459 #else | |
460 unsigned j,i,num_pixels=src_size/3; | |
461 uint16_t *d = (uint16_t *)dst; | |
462 for(i=0,j=0; j<num_pixels; i+=3,j++) | |
463 { | |
464 const int b= src[i+0]; | |
465 const int g= src[i+1]; | |
466 const int r= src[i+2]; | |
467 | |
468 d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | |
469 } | |
470 #endif | |
471 } | 443 } |
472 | 444 |
473 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) | 445 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
474 { | 446 { |
475 #ifdef HAVE_MMX | |
476 const uint8_t *s = src; | 447 const uint8_t *s = src; |
477 const uint8_t *end,*mm_end; | 448 const uint8_t *end; |
449 #ifdef HAVE_MMX | |
450 const uint8_t *mm_end; | |
451 #endif | |
478 uint16_t *d = (uint16_t *)dst; | 452 uint16_t *d = (uint16_t *)dst; |
479 end = s + src_size; | 453 end = s + src_size; |
480 mm_end = end -11; | 454 #ifdef HAVE_MMX |
481 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 455 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
482 __asm __volatile( | 456 __asm __volatile( |
483 "movq %0, %%mm7\n\t" | 457 "movq %0, %%mm7\n\t" |
484 "movq %1, %%mm6\n\t" | 458 "movq %1, %%mm6\n\t" |
485 ::"m"(red_15mask),"m"(green_15mask)); | 459 ::"m"(red_15mask),"m"(green_15mask)); |
460 mm_end = (uint8_t*)((((unsigned long)end)/16)*16); | |
486 while(s < mm_end) | 461 while(s < mm_end) |
487 { | 462 { |
488 __asm __volatile( | 463 __asm __volatile( |
489 PREFETCH" 32%1\n\t" | 464 PREFETCH" 32%1\n\t" |
490 "movd %1, %%mm0\n\t" | 465 "movd %1, %%mm0\n\t" |
516 MOVNTQ" %%mm0, %0\n\t" | 491 MOVNTQ" %%mm0, %0\n\t" |
517 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | 492 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
518 d += 4; | 493 d += 4; |
519 s += 12; | 494 s += 12; |
520 } | 495 } |
496 __asm __volatile(SFENCE:::"memory"); | |
497 __asm __volatile(EMMS:::"memory"); | |
498 #endif | |
521 while(s < end) | 499 while(s < end) |
522 { | 500 { |
523 const int b= *s++; | 501 const int b= *s++; |
524 const int g= *s++; | 502 const int g= *s++; |
525 const int r= *s++; | 503 const int r= *s++; |
526 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | 504 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
527 } | 505 } |
506 } | |
507 | |
508 /* | |
509 I use here less accurate approximation by simply | |
510 left-shifting the input | |
511 value and filling the low order bits with | |
512 zeroes. This method improves png's | |
513 compression but this scheme cannot reproduce white exactly, since it does not | |
514 generate an all-ones maximum value; the net effect is to darken the | |
515 image slightly. | |
516 | |
517 The better method should be "left bit replication": | |
518 | |
519 4 3 2 1 0 | |
520 --------- | |
521 1 1 0 1 1 | |
522 | |
523 7 6 5 4 3 2 1 0 | |
524 ---------------- | |
525 1 1 0 1 1 1 1 0 | |
526 |=======| |===| | |
527 | Leftmost Bits Repeated to Fill Open Bits | |
528 | | |
529 Original Bits | |
530 */ | |
531 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
532 { | |
533 const uint16_t *end; | |
534 #ifdef HAVE_MMX | |
535 const uint16_t *mm_end; | |
536 #endif | |
537 uint8_t *d = (uint8_t *)dst; | |
538 const uint16_t *s = (uint16_t *)src; | |
539 end = s + src_size/2; | |
540 #ifdef HAVE_MMX | |
541 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
542 mm_end = (uint16_t*)((((unsigned long)end)/8)*8); | |
543 while(s < mm_end) | |
544 { | |
545 __asm __volatile( | |
546 PREFETCH" 32%1\n\t" | |
547 "movq %1, %%mm0\n\t" | |
548 "movq %1, %%mm1\n\t" | |
549 "movq %1, %%mm2\n\t" | |
550 "pand %2, %%mm0\n\t" | |
551 "pand %3, %%mm1\n\t" | |
552 "pand %4, %%mm2\n\t" | |
553 "psllq $3, %%mm0\n\t" | |
554 "psrlq $2, %%mm1\n\t" | |
555 "psrlq $7, %%mm2\n\t" | |
556 "movq %%mm0, %%mm3\n\t" | |
557 "movq %%mm1, %%mm4\n\t" | |
558 "movq %%mm2, %%mm5\n\t" | |
559 "punpcklwd %5, %%mm0\n\t" | |
560 "punpcklwd %5, %%mm1\n\t" | |
561 "punpcklwd %5, %%mm2\n\t" | |
562 "punpckhwd %5, %%mm3\n\t" | |
563 "punpckhwd %5, %%mm4\n\t" | |
564 "punpckhwd %5, %%mm5\n\t" | |
565 "psllq $8, %%mm1\n\t" | |
566 "psllq $16, %%mm2\n\t" | |
567 "por %%mm1, %%mm0\n\t" | |
568 "por %%mm2, %%mm0\n\t" | |
569 "psllq $8, %%mm4\n\t" | |
570 "psllq $16, %%mm5\n\t" | |
571 "por %%mm4, %%mm3\n\t" | |
572 "por %%mm5, %%mm3\n\t" | |
573 | |
574 "movq %%mm0, %%mm6\n\t" | |
575 "movq %%mm3, %%mm7\n\t" | |
576 | |
577 "movq 8%1, %%mm0\n\t" | |
578 "movq 8%1, %%mm1\n\t" | |
579 "movq 8%1, %%mm2\n\t" | |
580 "pand %2, %%mm0\n\t" | |
581 "pand %3, %%mm1\n\t" | |
582 "pand %4, %%mm2\n\t" | |
583 "psllq $3, %%mm0\n\t" | |
584 "psrlq $2, %%mm1\n\t" | |
585 "psrlq $7, %%mm2\n\t" | |
586 "movq %%mm0, %%mm3\n\t" | |
587 "movq %%mm1, %%mm4\n\t" | |
588 "movq %%mm2, %%mm5\n\t" | |
589 "punpcklwd %5, %%mm0\n\t" | |
590 "punpcklwd %5, %%mm1\n\t" | |
591 "punpcklwd %5, %%mm2\n\t" | |
592 "punpckhwd %5, %%mm3\n\t" | |
593 "punpckhwd %5, %%mm4\n\t" | |
594 "punpckhwd %5, %%mm5\n\t" | |
595 "psllq $8, %%mm1\n\t" | |
596 "psllq $16, %%mm2\n\t" | |
597 "por %%mm1, %%mm0\n\t" | |
598 "por %%mm2, %%mm0\n\t" | |
599 "psllq $8, %%mm4\n\t" | |
600 "psllq $16, %%mm5\n\t" | |
601 "por %%mm4, %%mm3\n\t" | |
602 "por %%mm5, %%mm3\n\t" | |
603 | |
604 :"=m"(*d) | |
605 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | |
606 :"memory"); | |
607 /* Borrowed 32 to 24 */ | |
608 __asm __volatile( | |
609 "movq %%mm0, %%mm4\n\t" | |
610 "movq %%mm3, %%mm5\n\t" | |
611 "movq %%mm6, %%mm0\n\t" | |
612 "movq %%mm7, %%mm1\n\t" | |
613 | |
614 "movq %%mm4, %%mm6\n\t" | |
615 "movq %%mm5, %%mm7\n\t" | |
616 "movq %%mm0, %%mm2\n\t" | |
617 "movq %%mm1, %%mm3\n\t" | |
618 | |
619 "psrlq $8, %%mm2\n\t" | |
620 "psrlq $8, %%mm3\n\t" | |
621 "psrlq $8, %%mm6\n\t" | |
622 "psrlq $8, %%mm7\n\t" | |
623 "pand %2, %%mm0\n\t" | |
624 "pand %2, %%mm1\n\t" | |
625 "pand %2, %%mm4\n\t" | |
626 "pand %2, %%mm5\n\t" | |
627 "pand %3, %%mm2\n\t" | |
628 "pand %3, %%mm3\n\t" | |
629 "pand %3, %%mm6\n\t" | |
630 "pand %3, %%mm7\n\t" | |
631 "por %%mm2, %%mm0\n\t" | |
632 "por %%mm3, %%mm1\n\t" | |
633 "por %%mm6, %%mm4\n\t" | |
634 "por %%mm7, %%mm5\n\t" | |
635 | |
636 "movq %%mm1, %%mm2\n\t" | |
637 "movq %%mm4, %%mm3\n\t" | |
638 "psllq $48, %%mm2\n\t" | |
639 "psllq $32, %%mm3\n\t" | |
640 "pand %4, %%mm2\n\t" | |
641 "pand %5, %%mm3\n\t" | |
642 "por %%mm2, %%mm0\n\t" | |
643 "psrlq $16, %%mm1\n\t" | |
644 "psrlq $32, %%mm4\n\t" | |
645 "psllq $16, %%mm5\n\t" | |
646 "por %%mm3, %%mm1\n\t" | |
647 "pand %6, %%mm5\n\t" | |
648 "por %%mm5, %%mm4\n\t" | |
649 | |
650 MOVNTQ" %%mm0, %0\n\t" | |
651 MOVNTQ" %%mm1, 8%0\n\t" | |
652 MOVNTQ" %%mm4, 16%0" | |
653 | |
654 :"=m"(*d) | |
655 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
656 :"memory"); | |
657 d += 24; | |
658 s += 8; | |
659 } | |
528 __asm __volatile(SFENCE:::"memory"); | 660 __asm __volatile(SFENCE:::"memory"); |
529 __asm __volatile(EMMS:::"memory"); | 661 __asm __volatile(EMMS:::"memory"); |
530 #else | 662 #endif |
531 unsigned j,i,num_pixels=src_size/3; | 663 while(s < end) |
532 uint16_t *d = (uint16_t *)dst; | 664 { |
533 for(i=0,j=0; j<num_pixels; i+=3,j++) | 665 register uint16_t bgr; |
534 { | 666 bgr = *s++; |
535 const int b= src[i+0]; | 667 *d++ = (bgr&0x1F)<<3; |
536 const int g= src[i+1]; | 668 *d++ = (bgr&0x3E0)>>2; |
537 const int r= src[i+2]; | 669 *d++ = (bgr&0x7C00)>>7; |
538 | 670 } |
539 d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | 671 } |
540 } | 672 |
541 #endif | 673 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, unsigned src_size) |
674 { | |
675 const uint16_t *end; | |
676 #ifdef HAVE_MMX | |
677 const uint16_t *mm_end; | |
678 #endif | |
679 uint8_t *d = (uint8_t *)dst; | |
680 const uint16_t *s = (const uint16_t *)src; | |
681 end = s + src_size/2; | |
682 #ifdef HAVE_MMX | |
683 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
684 mm_end = (uint16_t*)((((unsigned long)end)/8)*8); | |
685 while(s < mm_end) | |
686 { | |
687 __asm __volatile( | |
688 PREFETCH" 32%1\n\t" | |
689 "movq %1, %%mm0\n\t" | |
690 "movq %1, %%mm1\n\t" | |
691 "movq %1, %%mm2\n\t" | |
692 "pand %2, %%mm0\n\t" | |
693 "pand %3, %%mm1\n\t" | |
694 "pand %4, %%mm2\n\t" | |
695 "psllq $3, %%mm0\n\t" | |
696 "psrlq $3, %%mm1\n\t" | |
697 "psrlq $8, %%mm2\n\t" | |
698 "movq %%mm0, %%mm3\n\t" | |
699 "movq %%mm1, %%mm4\n\t" | |
700 "movq %%mm2, %%mm5\n\t" | |
701 "punpcklwd %5, %%mm0\n\t" | |
702 "punpcklwd %5, %%mm1\n\t" | |
703 "punpcklwd %5, %%mm2\n\t" | |
704 "punpckhwd %5, %%mm3\n\t" | |
705 "punpckhwd %5, %%mm4\n\t" | |
706 "punpckhwd %5, %%mm5\n\t" | |
707 "psllq $8, %%mm1\n\t" | |
708 "psllq $16, %%mm2\n\t" | |
709 "por %%mm1, %%mm0\n\t" | |
710 "por %%mm2, %%mm0\n\t" | |
711 "psllq $8, %%mm4\n\t" | |
712 "psllq $16, %%mm5\n\t" | |
713 "por %%mm4, %%mm3\n\t" | |
714 "por %%mm5, %%mm3\n\t" | |
715 | |
716 "movq %%mm0, %%mm6\n\t" | |
717 "movq %%mm3, %%mm7\n\t" | |
718 | |
719 "movq 8%1, %%mm0\n\t" | |
720 "movq 8%1, %%mm1\n\t" | |
721 "movq 8%1, %%mm2\n\t" | |
722 "pand %2, %%mm0\n\t" | |
723 "pand %3, %%mm1\n\t" | |
724 "pand %4, %%mm2\n\t" | |
725 "psllq $3, %%mm0\n\t" | |
726 "psrlq $3, %%mm1\n\t" | |
727 "psrlq $8, %%mm2\n\t" | |
728 "movq %%mm0, %%mm3\n\t" | |
729 "movq %%mm1, %%mm4\n\t" | |
730 "movq %%mm2, %%mm5\n\t" | |
731 "punpcklwd %5, %%mm0\n\t" | |
732 "punpcklwd %5, %%mm1\n\t" | |
733 "punpcklwd %5, %%mm2\n\t" | |
734 "punpckhwd %5, %%mm3\n\t" | |
735 "punpckhwd %5, %%mm4\n\t" | |
736 "punpckhwd %5, %%mm5\n\t" | |
737 "psllq $8, %%mm1\n\t" | |
738 "psllq $16, %%mm2\n\t" | |
739 "por %%mm1, %%mm0\n\t" | |
740 "por %%mm2, %%mm0\n\t" | |
741 "psllq $8, %%mm4\n\t" | |
742 "psllq $16, %%mm5\n\t" | |
743 "por %%mm4, %%mm3\n\t" | |
744 "por %%mm5, %%mm3\n\t" | |
745 :"=m"(*d) | |
746 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |
747 :"memory"); | |
748 /* Borrowed 32 to 24 */ | |
749 __asm __volatile( | |
750 "movq %%mm0, %%mm4\n\t" | |
751 "movq %%mm3, %%mm5\n\t" | |
752 "movq %%mm6, %%mm0\n\t" | |
753 "movq %%mm7, %%mm1\n\t" | |
754 | |
755 "movq %%mm4, %%mm6\n\t" | |
756 "movq %%mm5, %%mm7\n\t" | |
757 "movq %%mm0, %%mm2\n\t" | |
758 "movq %%mm1, %%mm3\n\t" | |
759 | |
760 "psrlq $8, %%mm2\n\t" | |
761 "psrlq $8, %%mm3\n\t" | |
762 "psrlq $8, %%mm6\n\t" | |
763 "psrlq $8, %%mm7\n\t" | |
764 "pand %2, %%mm0\n\t" | |
765 "pand %2, %%mm1\n\t" | |
766 "pand %2, %%mm4\n\t" | |
767 "pand %2, %%mm5\n\t" | |
768 "pand %3, %%mm2\n\t" | |
769 "pand %3, %%mm3\n\t" | |
770 "pand %3, %%mm6\n\t" | |
771 "pand %3, %%mm7\n\t" | |
772 "por %%mm2, %%mm0\n\t" | |
773 "por %%mm3, %%mm1\n\t" | |
774 "por %%mm6, %%mm4\n\t" | |
775 "por %%mm7, %%mm5\n\t" | |
776 | |
777 "movq %%mm1, %%mm2\n\t" | |
778 "movq %%mm4, %%mm3\n\t" | |
779 "psllq $48, %%mm2\n\t" | |
780 "psllq $32, %%mm3\n\t" | |
781 "pand %4, %%mm2\n\t" | |
782 "pand %5, %%mm3\n\t" | |
783 "por %%mm2, %%mm0\n\t" | |
784 "psrlq $16, %%mm1\n\t" | |
785 "psrlq $32, %%mm4\n\t" | |
786 "psllq $16, %%mm5\n\t" | |
787 "por %%mm3, %%mm1\n\t" | |
788 "pand %6, %%mm5\n\t" | |
789 "por %%mm5, %%mm4\n\t" | |
790 | |
791 MOVNTQ" %%mm0, %0\n\t" | |
792 MOVNTQ" %%mm1, 8%0\n\t" | |
793 MOVNTQ" %%mm4, 16%0" | |
794 | |
795 :"=m"(*d) | |
796 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
797 :"memory"); | |
798 d += 24; | |
799 s += 8; | |
800 } | |
801 __asm __volatile(SFENCE:::"memory"); | |
802 __asm __volatile(EMMS:::"memory"); | |
803 #endif | |
804 while(s < end) | |
805 { | |
806 register uint16_t bgr; | |
807 bgr = *s++; | |
808 *d++ = (bgr&0x1F)<<3; | |
809 *d++ = (bgr&0x7E0)>>3; | |
810 *d++ = (bgr&0xF800)>>8; | |
811 } | |
812 } | |
813 | |
814 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
815 { | |
816 const uint16_t *end; | |
817 #ifdef HAVE_MMX | |
818 const uint16_t *mm_end; | |
819 #endif | |
820 uint8_t *d = (uint8_t *)dst; | |
821 const uint16_t *s = (const uint16_t *)src; | |
822 end = s + src_size/2; | |
823 #ifdef HAVE_MMX | |
824 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
825 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | |
826 mm_end = (uint16_t*)((((unsigned long)end)/4)*4); | |
827 while(s < mm_end) | |
828 { | |
829 __asm __volatile( | |
830 PREFETCH" 32%1\n\t" | |
831 "movq %1, %%mm0\n\t" | |
832 "movq %1, %%mm1\n\t" | |
833 "movq %1, %%mm2\n\t" | |
834 "pand %2, %%mm0\n\t" | |
835 "pand %3, %%mm1\n\t" | |
836 "pand %4, %%mm2\n\t" | |
837 "psllq $3, %%mm0\n\t" | |
838 "psrlq $2, %%mm1\n\t" | |
839 "psrlq $7, %%mm2\n\t" | |
840 "movq %%mm0, %%mm3\n\t" | |
841 "movq %%mm1, %%mm4\n\t" | |
842 "movq %%mm2, %%mm5\n\t" | |
843 "punpcklwd %%mm7, %%mm0\n\t" | |
844 "punpcklwd %%mm7, %%mm1\n\t" | |
845 "punpcklwd %%mm7, %%mm2\n\t" | |
846 "punpckhwd %%mm7, %%mm3\n\t" | |
847 "punpckhwd %%mm7, %%mm4\n\t" | |
848 "punpckhwd %%mm7, %%mm5\n\t" | |
849 "psllq $8, %%mm1\n\t" | |
850 "psllq $16, %%mm2\n\t" | |
851 "por %%mm1, %%mm0\n\t" | |
852 "por %%mm2, %%mm0\n\t" | |
853 "psllq $8, %%mm4\n\t" | |
854 "psllq $16, %%mm5\n\t" | |
855 "por %%mm4, %%mm3\n\t" | |
856 "por %%mm5, %%mm3\n\t" | |
857 MOVNTQ" %%mm0, %0\n\t" | |
858 MOVNTQ" %%mm3, 8%0\n\t" | |
859 :"=m"(*d) | |
860 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) | |
861 :"memory"); | |
862 d += 16; | |
863 s += 4; | |
864 } | |
865 __asm __volatile(SFENCE:::"memory"); | |
866 __asm __volatile(EMMS:::"memory"); | |
867 #endif | |
868 while(s < end) | |
869 { | |
870 register uint16_t bgr; | |
871 bgr = *s++; | |
872 *d++ = (bgr&0x1F)<<3; | |
873 *d++ = (bgr&0x3E0)>>2; | |
874 *d++ = (bgr&0x7C00)>>7; | |
875 *d++ = 0; | |
876 } | |
877 } | |
878 | |
879 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, unsigned src_size) | |
880 { | |
881 const uint16_t *end; | |
882 #ifdef HAVE_MMX | |
883 const uint16_t *mm_end; | |
884 #endif | |
885 uint8_t *d = (uint8_t *)dst; | |
886 const uint16_t *s = (uint16_t *)src; | |
887 end = s + src_size/2; | |
888 #ifdef HAVE_MMX | |
889 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | |
890 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); | |
891 mm_end = (uint16_t*)((((unsigned long)end)/4)*4); | |
892 while(s < mm_end) | |
893 { | |
894 __asm __volatile( | |
895 PREFETCH" 32%1\n\t" | |
896 "movq %1, %%mm0\n\t" | |
897 "movq %1, %%mm1\n\t" | |
898 "movq %1, %%mm2\n\t" | |
899 "pand %2, %%mm0\n\t" | |
900 "pand %3, %%mm1\n\t" | |
901 "pand %4, %%mm2\n\t" | |
902 "psllq $3, %%mm0\n\t" | |
903 "psrlq $3, %%mm1\n\t" | |
904 "psrlq $8, %%mm2\n\t" | |
905 "movq %%mm0, %%mm3\n\t" | |
906 "movq %%mm1, %%mm4\n\t" | |
907 "movq %%mm2, %%mm5\n\t" | |
908 "punpcklwd %%mm7, %%mm0\n\t" | |
909 "punpcklwd %%mm7, %%mm1\n\t" | |
910 "punpcklwd %%mm7, %%mm2\n\t" | |
911 "punpckhwd %%mm7, %%mm3\n\t" | |
912 "punpckhwd %%mm7, %%mm4\n\t" | |
913 "punpckhwd %%mm7, %%mm5\n\t" | |
914 "psllq $8, %%mm1\n\t" | |
915 "psllq $16, %%mm2\n\t" | |
916 "por %%mm1, %%mm0\n\t" | |
917 "por %%mm2, %%mm0\n\t" | |
918 "psllq $8, %%mm4\n\t" | |
919 "psllq $16, %%mm5\n\t" | |
920 "por %%mm4, %%mm3\n\t" | |
921 "por %%mm5, %%mm3\n\t" | |
922 MOVNTQ" %%mm0, %0\n\t" | |
923 MOVNTQ" %%mm3, 8%0\n\t" | |
924 :"=m"(*d) | |
925 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) | |
926 :"memory"); | |
927 d += 16; | |
928 s += 4; | |
929 } | |
930 __asm __volatile(SFENCE:::"memory"); | |
931 __asm __volatile(EMMS:::"memory"); | |
932 #endif | |
933 while(s < end) | |
934 { | |
935 register uint16_t bgr; | |
936 bgr = *s++; | |
937 *d++ = (bgr&0x1F)<<3; | |
938 *d++ = (bgr&0x7E0)>>3; | |
939 *d++ = (bgr&0xF800)>>8; | |
940 *d++ = 0; | |
941 } | |
542 } | 942 } |
543 | 943 |
544 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size) | 944 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
545 { | 945 { |
546 #ifdef HAVE_MMX | 946 #ifdef HAVE_MMX |
947 /* TODO: unroll this loop */ | |
547 asm volatile ( | 948 asm volatile ( |
548 "xorl %%eax, %%eax \n\t" | 949 "xorl %%eax, %%eax \n\t" |
549 ".balign 16 \n\t" | 950 ".balign 16 \n\t" |
550 "1: \n\t" | 951 "1: \n\t" |
551 PREFETCH" 32(%0, %%eax) \n\t" | 952 PREFETCH" 32(%0, %%eax) \n\t" |
552 "movq (%0, %%eax), %%mm0 \n\t" | 953 "movq (%0, %%eax), %%mm0 \n\t" |
553 "movq %%mm0, %%mm1 \n\t" | 954 "movq %%mm0, %%mm1 \n\t" |
554 "movq %%mm0, %%mm2 \n\t" | 955 "movq %%mm0, %%mm2 \n\t" |
555 "pslld $16, %%mm0 \n\t" | 956 "pslld $16, %%mm0 \n\t" |
556 "psrld $16, %%mm1 \n\t" | 957 "psrld $16, %%mm1 \n\t" |
557 "pand "MANGLE(mask32r)", %%mm0 \n\t" | 958 "pand "MANGLE(mask32r)", %%mm0 \n\t" |
558 "pand "MANGLE(mask32g)", %%mm2 \n\t" | 959 "pand "MANGLE(mask32g)", %%mm2 \n\t" |
559 "pand "MANGLE(mask32b)", %%mm1 \n\t" | 960 "pand "MANGLE(mask32b)", %%mm1 \n\t" |
560 "por %%mm0, %%mm2 \n\t" | 961 "por %%mm0, %%mm2 \n\t" |
561 "por %%mm1, %%mm2 \n\t" | 962 "por %%mm1, %%mm2 \n\t" |
562 MOVNTQ" %%mm2, (%1, %%eax) \n\t" | 963 MOVNTQ" %%mm2, (%1, %%eax) \n\t" |
563 "addl $8, %%eax \n\t" | 964 "addl $8, %%eax \n\t" |
564 "cmpl %2, %%eax \n\t" | 965 "cmpl %2, %%eax \n\t" |
568 ); | 969 ); |
569 | 970 |
570 __asm __volatile(SFENCE:::"memory"); | 971 __asm __volatile(SFENCE:::"memory"); |
571 __asm __volatile(EMMS:::"memory"); | 972 __asm __volatile(EMMS:::"memory"); |
572 #else | 973 #else |
573 int i; | 974 unsigned i; |
574 int num_pixels= src_size >> 2; | 975 unsigned num_pixels = src_size >> 2; |
575 for(i=0; i<num_pixels; i++) | 976 for(i=0; i<num_pixels; i++) |
576 { | 977 { |
577 dst[4*i + 0] = src[4*i + 2]; | 978 dst[4*i + 0] = src[4*i + 2]; |
578 dst[4*i + 1] = src[4*i + 1]; | 979 dst[4*i + 1] = src[4*i + 1]; |
579 dst[4*i + 2] = src[4*i + 0]; | 980 dst[4*i + 2] = src[4*i + 0]; |
581 #endif | 982 #endif |
582 } | 983 } |
583 | 984 |
584 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) | 985 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) |
585 { | 986 { |
586 int i; | 987 unsigned i; |
587 #ifdef HAVE_MMX | 988 #ifdef HAVE_MMX |
588 int mmx_size= 23 - src_size; | 989 int mmx_size= 23 - src_size; |
589 asm volatile ( | 990 asm volatile ( |
590 "movq "MANGLE(mask24r)", %%mm5 \n\t" | 991 "movq "MANGLE(mask24r)", %%mm5 \n\t" |
591 "movq "MANGLE(mask24g)", %%mm6 \n\t" | 992 "movq "MANGLE(mask24g)", %%mm6 \n\t" |
629 | 1030 |
630 __asm __volatile(SFENCE:::"memory"); | 1031 __asm __volatile(SFENCE:::"memory"); |
631 __asm __volatile(EMMS:::"memory"); | 1032 __asm __volatile(EMMS:::"memory"); |
632 | 1033 |
633 if(mmx_size==23) return; //finihsed, was multiple of 8 | 1034 if(mmx_size==23) return; //finihsed, was multiple of 8 |
1035 | |
634 src+= src_size; | 1036 src+= src_size; |
635 dst+= src_size; | 1037 dst+= src_size; |
636 src_size= 23 - mmx_size; | 1038 src_size= 23-mmx_size; |
637 src-= src_size; | 1039 src-= src_size; |
638 dst-= src_size; | 1040 dst-= src_size; |
639 #endif | 1041 #endif |
640 for(i=0; i<src_size; i+=3) | 1042 for(i=0; i<src_size; i+=3) |
641 { | 1043 { |
642 register int x; | 1044 register uint8_t x; |
643 x = src[i + 2]; | 1045 x = src[i + 2]; |
644 dst[i + 1] = src[i + 1]; | 1046 dst[i + 1] = src[i + 1]; |
645 dst[i + 2] = src[i + 0]; | 1047 dst[i + 2] = src[i + 0]; |
646 dst[i + 0] = x; | 1048 dst[i + 0] = x; |
647 } | 1049 } |
649 | 1051 |
650 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | 1052 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
651 unsigned int width, unsigned int height, | 1053 unsigned int width, unsigned int height, |
652 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma) | 1054 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma) |
653 { | 1055 { |
654 int y; | 1056 unsigned y; |
655 const int chromWidth= width>>1; | 1057 const unsigned chromWidth= width>>1; |
656 for(y=0; y<height; y++) | 1058 for(y=0; y<height; y++) |
657 { | 1059 { |
658 #ifdef HAVE_MMX | 1060 #ifdef HAVE_MMX |
659 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) | 1061 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) |
660 asm volatile( | 1062 asm volatile( |
689 " jb 1b \n\t" | 1091 " jb 1b \n\t" |
690 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth) | 1092 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth) |
691 : "%eax" | 1093 : "%eax" |
692 ); | 1094 ); |
693 #else | 1095 #else |
1096 #if __WORDSIZE >= 64 | |
694 int i; | 1097 int i; |
695 for(i=0; i<chromWidth; i++) | 1098 uint64_t *ldst = (uint64_t *) dst; |
696 { | 1099 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
697 dst[4*i+0] = ysrc[2*i+0]; | 1100 for(i = 0; i < chromWidth; i += 2){ |
698 dst[4*i+1] = usrc[i]; | 1101 uint64_t k, l; |
699 dst[4*i+2] = ysrc[2*i+1]; | 1102 k = yc[0] + (uc[0] << 8) + |
700 dst[4*i+3] = vsrc[i]; | 1103 (yc[1] << 16) + (vc[0] << 24); |
1104 l = yc[2] + (uc[1] << 8) + | |
1105 (yc[3] << 16) + (vc[1] << 24); | |
1106 *ldst++ = k + (l << 32); | |
1107 yc += 4; | |
1108 uc += 2; | |
1109 vc += 2; | |
701 } | 1110 } |
1111 | |
1112 #else | |
1113 int i, *idst = (int32_t *) dst; | |
1114 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | |
1115 for(i = 0; i < chromWidth; i++){ | |
1116 *idst++ = yc[0] + (uc[0] << 8) + | |
1117 (yc[1] << 16) + (vc[0] << 24); | |
1118 yc += 2; | |
1119 uc++; | |
1120 vc++; | |
1121 } | |
1122 #endif | |
702 #endif | 1123 #endif |
703 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) | 1124 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) |
704 { | 1125 { |
705 usrc += chromStride; | 1126 usrc += chromStride; |
706 vsrc += chromStride; | 1127 vsrc += chromStride; |
746 */ | 1167 */ |
747 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 1168 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
748 unsigned int width, unsigned int height, | 1169 unsigned int width, unsigned int height, |
749 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | 1170 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) |
750 { | 1171 { |
751 int y; | 1172 unsigned y; |
752 const int chromWidth= width>>1; | 1173 const unsigned chromWidth= width>>1; |
753 for(y=0; y<height; y+=2) | 1174 for(y=0; y<height; y+=2) |
754 { | 1175 { |
755 #ifdef HAVE_MMX | 1176 #ifdef HAVE_MMX |
756 asm volatile( | 1177 asm volatile( |
757 "xorl %%eax, %%eax \n\t" | 1178 "xorl %%eax, %%eax \n\t" |
833 | 1254 |
834 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) | 1255 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) |
835 : "memory", "%eax" | 1256 : "memory", "%eax" |
836 ); | 1257 ); |
837 #else | 1258 #else |
838 int i; | 1259 unsigned i; |
839 for(i=0; i<chromWidth; i++) | 1260 for(i=0; i<chromWidth; i++) |
840 { | 1261 { |
841 ydst[2*i+0] = src[4*i+0]; | 1262 ydst[2*i+0] = src[4*i+0]; |
842 udst[i] = src[4*i+1]; | 1263 udst[i] = src[4*i+1]; |
843 ydst[2*i+1] = src[4*i+2]; | 1264 ydst[2*i+1] = src[4*i+2]; |
882 */ | 1303 */ |
883 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 1304 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
884 unsigned int width, unsigned int height, | 1305 unsigned int width, unsigned int height, |
885 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | 1306 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) |
886 { | 1307 { |
887 int y; | 1308 unsigned y; |
888 const int chromWidth= width>>1; | 1309 const unsigned chromWidth= width>>1; |
889 for(y=0; y<height; y+=2) | 1310 for(y=0; y<height; y+=2) |
890 { | 1311 { |
891 #ifdef HAVE_MMX | 1312 #ifdef HAVE_MMX |
892 asm volatile( | 1313 asm volatile( |
893 "xorl %%eax, %%eax \n\t" | 1314 "xorl %%eax, %%eax \n\t" |
969 | 1390 |
970 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) | 1391 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) |
971 : "memory", "%eax" | 1392 : "memory", "%eax" |
972 ); | 1393 ); |
973 #else | 1394 #else |
974 int i; | 1395 unsigned i; |
975 for(i=0; i<chromWidth; i++) | 1396 for(i=0; i<chromWidth; i++) |
976 { | 1397 { |
977 udst[i] = src[4*i+0]; | 1398 udst[i] = src[4*i+0]; |
978 ydst[2*i+0] = src[4*i+1]; | 1399 ydst[2*i+0] = src[4*i+1]; |
979 vdst[i] = src[4*i+2]; | 1400 vdst[i] = src[4*i+2]; |
1008 */ | 1429 */ |
1009 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | 1430 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
1010 unsigned int width, unsigned int height, | 1431 unsigned int width, unsigned int height, |
1011 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) | 1432 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) |
1012 { | 1433 { |
1013 int y; | 1434 unsigned y; |
1014 const int chromWidth= width>>1; | 1435 const unsigned chromWidth= width>>1; |
1015 #ifdef HAVE_MMX | 1436 #ifdef HAVE_MMX |
1016 for(y=0; y<height-2; y+=2) | 1437 for(y=0; y<height-2; y+=2) |
1017 { | 1438 { |
1018 int i; | 1439 unsigned i; |
1019 for(i=0; i<2; i++) | 1440 for(i=0; i<2; i++) |
1020 { | 1441 { |
1021 asm volatile( | 1442 asm volatile( |
1022 "movl %2, %%eax \n\t" | 1443 "movl %2, %%eax \n\t" |
1023 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | 1444 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1252 #else | 1673 #else |
1253 y=0; | 1674 y=0; |
1254 #endif | 1675 #endif |
1255 for(; y<height; y+=2) | 1676 for(; y<height; y+=2) |
1256 { | 1677 { |
1257 int i; | 1678 unsigned i; |
1258 for(i=0; i<chromWidth; i++) | 1679 for(i=0; i<chromWidth; i++) |
1259 { | 1680 { |
1260 unsigned int b= src[6*i+0]; | 1681 unsigned int b= src[6*i+0]; |
1261 unsigned int g= src[6*i+1]; | 1682 unsigned int g= src[6*i+1]; |
1262 unsigned int r= src[6*i+2]; | 1683 unsigned int r= src[6*i+2]; |
1302 src += srcStride; | 1723 src += srcStride; |
1303 } | 1724 } |
1304 } | 1725 } |
1305 | 1726 |
1306 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, | 1727 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, |
1307 int width, int height, int src1Stride, int src2Stride, int dstStride){ | 1728 unsigned width, unsigned height, unsigned src1Stride, |
1308 int h; | 1729 unsigned src2Stride, unsigned dstStride){ |
1730 unsigned h; | |
1309 | 1731 |
1310 for(h=0; h < height; h++) | 1732 for(h=0; h < height; h++) |
1311 { | 1733 { |
1312 int w; | 1734 unsigned w; |
1313 | 1735 |
1314 #ifdef HAVE_MMX | 1736 #ifdef HAVE_MMX |
1315 #ifdef HAVE_SSE2 | 1737 #ifdef HAVE_SSE2 |
1316 asm( | 1738 asm( |
1317 "xorl %%eax, %%eax \n\t" | 1739 "xorl %%eax, %%eax \n\t" |