comparison postproc/rgb2rgb_template.c @ 6492:e7635c03910f

sync with mplayer xp - partial yvu9 support (copy only) - rgb 15/16 -> 24/32 converters - int->unsigned changes
author arpi
date Sat, 22 Jun 2002 08:49:45 +0000
parents c5cf988c6d6f
children f98313dcd428
comparison
equal deleted inserted replaced
6491:920796b6c7b1 6492:e7635c03910f
5 * Software YUV to YUV convertor 5 * Software YUV to YUV convertor
6 * Software YUV to RGB convertor 6 * Software YUV to RGB convertor
7 * Written by Nick Kurshev. 7 * Written by Nick Kurshev.
8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) 8 * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL)
9 */ 9 */
10
11 #include <stddef.h>
12 #include <inttypes.h> /* for __WORDSIZE */
13
14 #ifndef __WORDSIZE
15 #warning You have misconfigured system and probably will lose performance!
16 #endif
10 17
11 #undef PREFETCH 18 #undef PREFETCH
12 #undef MOVNTQ 19 #undef MOVNTQ
13 #undef EMMS 20 #undef EMMS
14 #undef SFENCE 21 #undef SFENCE
54 { 61 {
55 uint8_t *dest = dst; 62 uint8_t *dest = dst;
56 const uint8_t *s = src; 63 const uint8_t *s = src;
57 const uint8_t *end; 64 const uint8_t *end;
58 #ifdef HAVE_MMX 65 #ifdef HAVE_MMX
59 const uint8_t *mm_end; 66 uint8_t *mm_end;
60 #endif 67 #endif
61 end = s + src_size; 68 end = s + src_size;
62 #ifdef HAVE_MMX 69 #ifdef HAVE_MMX
63 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 70 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
64 mm_end = end - 23;
65 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); 71 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
72 mm_end = (uint8_t*)((((unsigned long)end)/24)*24);
66 while(s < mm_end) 73 while(s < mm_end)
67 { 74 {
68 __asm __volatile( 75 __asm __volatile(
69 PREFETCH" 32%1\n\t" 76 PREFETCH" 32%1\n\t"
70 "movd %1, %%mm0\n\t" 77 "movd %1, %%mm0\n\t"
105 { 112 {
106 uint8_t *dest = dst; 113 uint8_t *dest = dst;
107 const uint8_t *s = src; 114 const uint8_t *s = src;
108 const uint8_t *end; 115 const uint8_t *end;
109 #ifdef HAVE_MMX 116 #ifdef HAVE_MMX
110 const uint8_t *mm_end; 117 uint8_t *mm_end;
111 #endif 118 #endif
112 end = s + src_size; 119 end = s + src_size;
113 #ifdef HAVE_MMX 120 #ifdef HAVE_MMX
114 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 121 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
115 mm_end = end - 31; 122 mm_end = (uint8_t*)((((unsigned long)end)/32)*32);
116 while(s < mm_end) 123 while(s < mm_end)
117 { 124 {
118 __asm __volatile( 125 __asm __volatile(
119 PREFETCH" 32%1\n\t" 126 PREFETCH" 32%1\n\t"
120 "movq %1, %%mm0\n\t" 127 "movq %1, %%mm0\n\t"
184 MMX2, 3DNOW optimization by Nick Kurshev 191 MMX2, 3DNOW optimization by Nick Kurshev
185 32bit c version, and and&add trick by Michael Niedermayer 192 32bit c version, and and&add trick by Michael Niedermayer
186 */ 193 */
187 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) 194 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size)
188 { 195 {
189 #ifdef HAVE_MMX 196 register const uint8_t* s=src;
190 register int offs=15-src_size; 197 register uint8_t* d=dst;
191 register const char* s=src-offs; 198 register const uint8_t *end;
192 register char* d=dst-offs; 199 uint8_t *mm_end;
193 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs))); 200 end = s + src_size;
194 __asm __volatile( 201 #ifdef HAVE_MMX
195 "movq %0, %%mm4\n\t" 202 __asm __volatile(PREFETCH" %0"::"m"(*s));
196 ::"m"(mask15s)); 203 __asm __volatile("movq %0, %%mm4"::"m"(mask15s));
197 while(offs<0) 204 mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
205 while(s<mm_end)
198 { 206 {
199 __asm __volatile( 207 __asm __volatile(
200 PREFETCH" 32%1\n\t" 208 PREFETCH" 32%1\n\t"
201 "movq %1, %%mm0\n\t" 209 "movq %1, %%mm0\n\t"
202 "movq 8%1, %%mm2\n\t" 210 "movq 8%1, %%mm2\n\t"
206 "pand %%mm4, %%mm2\n\t" 214 "pand %%mm4, %%mm2\n\t"
207 "paddw %%mm1, %%mm0\n\t" 215 "paddw %%mm1, %%mm0\n\t"
208 "paddw %%mm3, %%mm2\n\t" 216 "paddw %%mm3, %%mm2\n\t"
209 MOVNTQ" %%mm0, %0\n\t" 217 MOVNTQ" %%mm0, %0\n\t"
210 MOVNTQ" %%mm2, 8%0" 218 MOVNTQ" %%mm2, 8%0"
211 :"=m"(*(d+offs)) 219 :"=m"(*d)
212 :"m"(*(s+offs)) 220 :"m"(*s)
213 ); 221 );
214 offs+=16; 222 d+=16;
223 s+=16;
215 } 224 }
216 __asm __volatile(SFENCE:::"memory"); 225 __asm __volatile(SFENCE:::"memory");
217 __asm __volatile(EMMS:::"memory"); 226 __asm __volatile(EMMS:::"memory");
218 #else 227 #endif
219 #if 0 228 mm_end = (uint8_t*)((((unsigned long)end)/4)*4);
220 const uint16_t *s1=( uint16_t * )src; 229 while(s < mm_end)
221 uint16_t *d1=( uint16_t * )dst; 230 {
222 uint16_t *e=((uint8_t *)s1)+src_size; 231 register unsigned x= *((uint32_t *)s);
223 while( s1<e ){ 232 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
224 register int x=*( s1++ ); 233 d+=4;
225 /* rrrrrggggggbbbbb 234 s+=4;
226 0rrrrrgggggbbbbb 235 }
227 0111 1111 1110 0000=0x7FE0 236 if(s < end)
228 00000000000001 1111=0x001F */ 237 {
229 *( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 ); 238 register unsigned short x= *((uint16_t *)s);
230 } 239 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
231 #else 240 }
232 const unsigned *s1=( unsigned * )src;
233 unsigned *d1=( unsigned * )dst;
234 int i;
235 int size= src_size>>2;
236 for(i=0; i<size; i++)
237 {
238 register int x= s1[i];
239 // d1[i] = x + (x&0x7FE07FE0); //faster but need msbit =0 which might not allways be true
240 d1[i] = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
241
242 }
243 #endif
244 #endif
245 } 241 }
246 242
247 static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size) 243 static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size)
248 { 244 {
249 unsigned j,i,num_pixels=src_size/3; 245 unsigned j,i,num_pixels=src_size/3;
255 } 251 }
256 } 252 }
257 253
258 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) 254 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size)
259 { 255 {
260 #ifdef HAVE_MMX
261 const uint8_t *s = src; 256 const uint8_t *s = src;
262 const uint8_t *end,*mm_end; 257 const uint8_t *end;
258 #ifdef HAVE_MMX
259 const uint8_t *mm_end;
260 #endif
263 uint16_t *d = (uint16_t *)dst; 261 uint16_t *d = (uint16_t *)dst;
264 end = s + src_size; 262 end = s + src_size;
265 mm_end = end - 15; 263 #ifdef HAVE_MMX
266 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 264 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
267 __asm __volatile( 265 __asm __volatile(
268 "movq %0, %%mm7\n\t" 266 "movq %0, %%mm7\n\t"
269 "movq %1, %%mm6\n\t" 267 "movq %1, %%mm6\n\t"
270 ::"m"(red_16mask),"m"(green_16mask)); 268 ::"m"(red_16mask),"m"(green_16mask));
269 mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
271 while(s < mm_end) 270 while(s < mm_end)
272 { 271 {
273 __asm __volatile( 272 __asm __volatile(
274 PREFETCH" 32%1\n\t" 273 PREFETCH" 32%1\n\t"
275 "movd %1, %%mm0\n\t" 274 "movd %1, %%mm0\n\t"
301 MOVNTQ" %%mm0, %0\n\t" 300 MOVNTQ" %%mm0, %0\n\t"
302 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); 301 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
303 d += 4; 302 d += 4;
304 s += 16; 303 s += 16;
305 } 304 }
305 __asm __volatile(SFENCE:::"memory");
306 __asm __volatile(EMMS:::"memory");
307 #endif
306 while(s < end) 308 while(s < end)
307 { 309 {
308 const int b= *s++; 310 const int b= *s++;
309 const int g= *s++; 311 const int g= *s++;
310 const int r= *s++; 312 const int r= *s++;
311 s++;
312 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); 313 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
313 } 314 s++;
314 __asm __volatile(SFENCE:::"memory"); 315 }
315 __asm __volatile(EMMS:::"memory");
316 #else
317 unsigned j,i,num_pixels=src_size/4;
318 uint16_t *d = (uint16_t *)dst;
319 for(i=0,j=0; j<num_pixels; i+=4,j++)
320 {
321 const int b= src[i+0];
322 const int g= src[i+1];
323 const int r= src[i+2];
324
325 d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
326 }
327 #endif
328 } 316 }
329 317
330 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) 318 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size)
331 { 319 {
332 #ifdef HAVE_MMX
333 const uint8_t *s = src; 320 const uint8_t *s = src;
334 const uint8_t *end,*mm_end; 321 const uint8_t *end;
322 #ifdef HAVE_MMX
323 const uint8_t *mm_end;
324 #endif
335 uint16_t *d = (uint16_t *)dst; 325 uint16_t *d = (uint16_t *)dst;
336 end = s + src_size; 326 end = s + src_size;
337 mm_end = end - 15; 327 #ifdef HAVE_MMX
338 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 328 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
339 __asm __volatile( 329 __asm __volatile(
340 "movq %0, %%mm7\n\t" 330 "movq %0, %%mm7\n\t"
341 "movq %1, %%mm6\n\t" 331 "movq %1, %%mm6\n\t"
342 ::"m"(red_15mask),"m"(green_15mask)); 332 ::"m"(red_15mask),"m"(green_15mask));
333 mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
343 while(s < mm_end) 334 while(s < mm_end)
344 { 335 {
345 __asm __volatile( 336 __asm __volatile(
346 PREFETCH" 32%1\n\t" 337 PREFETCH" 32%1\n\t"
347 "movd %1, %%mm0\n\t" 338 "movd %1, %%mm0\n\t"
373 MOVNTQ" %%mm0, %0\n\t" 364 MOVNTQ" %%mm0, %0\n\t"
374 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); 365 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
375 d += 4; 366 d += 4;
376 s += 16; 367 s += 16;
377 } 368 }
369 __asm __volatile(SFENCE:::"memory");
370 __asm __volatile(EMMS:::"memory");
371 #endif
378 while(s < end) 372 while(s < end)
379 { 373 {
380 const int b= *s++; 374 const int b= *s++;
381 const int g= *s++; 375 const int g= *s++;
382 const int r= *s++; 376 const int r= *s++;
377 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
383 s++; 378 s++;
384 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); 379 }
385 }
386 __asm __volatile(SFENCE:::"memory");
387 __asm __volatile(EMMS:::"memory");
388 #else
389 unsigned j,i,num_pixels=src_size/4;
390 uint16_t *d = (uint16_t *)dst;
391 for(i=0,j=0; j<num_pixels; i+=4,j++)
392 {
393 const int b= src[i+0];
394 const int g= src[i+1];
395 const int r= src[i+2];
396
397 d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
398 }
399 #endif
400 } 380 }
401 381
402 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) 382 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size)
403 { 383 {
404 #ifdef HAVE_MMX
405 const uint8_t *s = src; 384 const uint8_t *s = src;
406 const uint8_t *end,*mm_end; 385 const uint8_t *end;
386 #ifdef HAVE_MMX
387 const uint8_t *mm_end;
388 #endif
407 uint16_t *d = (uint16_t *)dst; 389 uint16_t *d = (uint16_t *)dst;
408 end = s + src_size; 390 end = s + src_size;
409 mm_end = end - 11; 391 #ifdef HAVE_MMX
410 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 392 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
411 __asm __volatile( 393 __asm __volatile(
412 "movq %0, %%mm7\n\t" 394 "movq %0, %%mm7\n\t"
413 "movq %1, %%mm6\n\t" 395 "movq %1, %%mm6\n\t"
414 ::"m"(red_16mask),"m"(green_16mask)); 396 ::"m"(red_16mask),"m"(green_16mask));
397 mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
415 while(s < mm_end) 398 while(s < mm_end)
416 { 399 {
417 __asm __volatile( 400 __asm __volatile(
418 PREFETCH" 32%1\n\t" 401 PREFETCH" 32%1\n\t"
419 "movd %1, %%mm0\n\t" 402 "movd %1, %%mm0\n\t"
445 MOVNTQ" %%mm0, %0\n\t" 428 MOVNTQ" %%mm0, %0\n\t"
446 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); 429 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
447 d += 4; 430 d += 4;
448 s += 12; 431 s += 12;
449 } 432 }
433 __asm __volatile(SFENCE:::"memory");
434 __asm __volatile(EMMS:::"memory");
435 #endif
450 while(s < end) 436 while(s < end)
451 { 437 {
452 const int b= *s++; 438 const int b= *s++;
453 const int g= *s++; 439 const int g= *s++;
454 const int r= *s++; 440 const int r= *s++;
455 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); 441 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
456 } 442 }
457 __asm __volatile(SFENCE:::"memory");
458 __asm __volatile(EMMS:::"memory");
459 #else
460 unsigned j,i,num_pixels=src_size/3;
461 uint16_t *d = (uint16_t *)dst;
462 for(i=0,j=0; j<num_pixels; i+=3,j++)
463 {
464 const int b= src[i+0];
465 const int g= src[i+1];
466 const int r= src[i+2];
467
468 d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
469 }
470 #endif
471 } 443 }
472 444
473 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) 445 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size)
474 { 446 {
475 #ifdef HAVE_MMX
476 const uint8_t *s = src; 447 const uint8_t *s = src;
477 const uint8_t *end,*mm_end; 448 const uint8_t *end;
449 #ifdef HAVE_MMX
450 const uint8_t *mm_end;
451 #endif
478 uint16_t *d = (uint16_t *)dst; 452 uint16_t *d = (uint16_t *)dst;
479 end = s + src_size; 453 end = s + src_size;
480 mm_end = end -11; 454 #ifdef HAVE_MMX
481 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 455 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
482 __asm __volatile( 456 __asm __volatile(
483 "movq %0, %%mm7\n\t" 457 "movq %0, %%mm7\n\t"
484 "movq %1, %%mm6\n\t" 458 "movq %1, %%mm6\n\t"
485 ::"m"(red_15mask),"m"(green_15mask)); 459 ::"m"(red_15mask),"m"(green_15mask));
460 mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
486 while(s < mm_end) 461 while(s < mm_end)
487 { 462 {
488 __asm __volatile( 463 __asm __volatile(
489 PREFETCH" 32%1\n\t" 464 PREFETCH" 32%1\n\t"
490 "movd %1, %%mm0\n\t" 465 "movd %1, %%mm0\n\t"
516 MOVNTQ" %%mm0, %0\n\t" 491 MOVNTQ" %%mm0, %0\n\t"
517 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); 492 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
518 d += 4; 493 d += 4;
519 s += 12; 494 s += 12;
520 } 495 }
496 __asm __volatile(SFENCE:::"memory");
497 __asm __volatile(EMMS:::"memory");
498 #endif
521 while(s < end) 499 while(s < end)
522 { 500 {
523 const int b= *s++; 501 const int b= *s++;
524 const int g= *s++; 502 const int g= *s++;
525 const int r= *s++; 503 const int r= *s++;
526 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); 504 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
527 } 505 }
506 }
507
508 /*
509 I use here less accurate approximation by simply
510 left-shifting the input
511 value and filling the low order bits with
512 zeroes. This method improves png's
513 compression but this scheme cannot reproduce white exactly, since it does not
514 generate an all-ones maximum value; the net effect is to darken the
515 image slightly.
516
517 The better method should be "left bit replication":
518
519 4 3 2 1 0
520 ---------
521 1 1 0 1 1
522
523 7 6 5 4 3 2 1 0
524 ----------------
525 1 1 0 1 1 1 1 0
526 |=======| |===|
527 | Leftmost Bits Repeated to Fill Open Bits
528 |
529 Original Bits
530 */
531 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, unsigned src_size)
532 {
533 const uint16_t *end;
534 #ifdef HAVE_MMX
535 const uint16_t *mm_end;
536 #endif
537 uint8_t *d = (uint8_t *)dst;
538 const uint16_t *s = (uint16_t *)src;
539 end = s + src_size/2;
540 #ifdef HAVE_MMX
541 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
542 mm_end = (uint16_t*)((((unsigned long)end)/8)*8);
543 while(s < mm_end)
544 {
545 __asm __volatile(
546 PREFETCH" 32%1\n\t"
547 "movq %1, %%mm0\n\t"
548 "movq %1, %%mm1\n\t"
549 "movq %1, %%mm2\n\t"
550 "pand %2, %%mm0\n\t"
551 "pand %3, %%mm1\n\t"
552 "pand %4, %%mm2\n\t"
553 "psllq $3, %%mm0\n\t"
554 "psrlq $2, %%mm1\n\t"
555 "psrlq $7, %%mm2\n\t"
556 "movq %%mm0, %%mm3\n\t"
557 "movq %%mm1, %%mm4\n\t"
558 "movq %%mm2, %%mm5\n\t"
559 "punpcklwd %5, %%mm0\n\t"
560 "punpcklwd %5, %%mm1\n\t"
561 "punpcklwd %5, %%mm2\n\t"
562 "punpckhwd %5, %%mm3\n\t"
563 "punpckhwd %5, %%mm4\n\t"
564 "punpckhwd %5, %%mm5\n\t"
565 "psllq $8, %%mm1\n\t"
566 "psllq $16, %%mm2\n\t"
567 "por %%mm1, %%mm0\n\t"
568 "por %%mm2, %%mm0\n\t"
569 "psllq $8, %%mm4\n\t"
570 "psllq $16, %%mm5\n\t"
571 "por %%mm4, %%mm3\n\t"
572 "por %%mm5, %%mm3\n\t"
573
574 "movq %%mm0, %%mm6\n\t"
575 "movq %%mm3, %%mm7\n\t"
576
577 "movq 8%1, %%mm0\n\t"
578 "movq 8%1, %%mm1\n\t"
579 "movq 8%1, %%mm2\n\t"
580 "pand %2, %%mm0\n\t"
581 "pand %3, %%mm1\n\t"
582 "pand %4, %%mm2\n\t"
583 "psllq $3, %%mm0\n\t"
584 "psrlq $2, %%mm1\n\t"
585 "psrlq $7, %%mm2\n\t"
586 "movq %%mm0, %%mm3\n\t"
587 "movq %%mm1, %%mm4\n\t"
588 "movq %%mm2, %%mm5\n\t"
589 "punpcklwd %5, %%mm0\n\t"
590 "punpcklwd %5, %%mm1\n\t"
591 "punpcklwd %5, %%mm2\n\t"
592 "punpckhwd %5, %%mm3\n\t"
593 "punpckhwd %5, %%mm4\n\t"
594 "punpckhwd %5, %%mm5\n\t"
595 "psllq $8, %%mm1\n\t"
596 "psllq $16, %%mm2\n\t"
597 "por %%mm1, %%mm0\n\t"
598 "por %%mm2, %%mm0\n\t"
599 "psllq $8, %%mm4\n\t"
600 "psllq $16, %%mm5\n\t"
601 "por %%mm4, %%mm3\n\t"
602 "por %%mm5, %%mm3\n\t"
603
604 :"=m"(*d)
605 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
606 :"memory");
607 /* Borrowed 32 to 24 */
608 __asm __volatile(
609 "movq %%mm0, %%mm4\n\t"
610 "movq %%mm3, %%mm5\n\t"
611 "movq %%mm6, %%mm0\n\t"
612 "movq %%mm7, %%mm1\n\t"
613
614 "movq %%mm4, %%mm6\n\t"
615 "movq %%mm5, %%mm7\n\t"
616 "movq %%mm0, %%mm2\n\t"
617 "movq %%mm1, %%mm3\n\t"
618
619 "psrlq $8, %%mm2\n\t"
620 "psrlq $8, %%mm3\n\t"
621 "psrlq $8, %%mm6\n\t"
622 "psrlq $8, %%mm7\n\t"
623 "pand %2, %%mm0\n\t"
624 "pand %2, %%mm1\n\t"
625 "pand %2, %%mm4\n\t"
626 "pand %2, %%mm5\n\t"
627 "pand %3, %%mm2\n\t"
628 "pand %3, %%mm3\n\t"
629 "pand %3, %%mm6\n\t"
630 "pand %3, %%mm7\n\t"
631 "por %%mm2, %%mm0\n\t"
632 "por %%mm3, %%mm1\n\t"
633 "por %%mm6, %%mm4\n\t"
634 "por %%mm7, %%mm5\n\t"
635
636 "movq %%mm1, %%mm2\n\t"
637 "movq %%mm4, %%mm3\n\t"
638 "psllq $48, %%mm2\n\t"
639 "psllq $32, %%mm3\n\t"
640 "pand %4, %%mm2\n\t"
641 "pand %5, %%mm3\n\t"
642 "por %%mm2, %%mm0\n\t"
643 "psrlq $16, %%mm1\n\t"
644 "psrlq $32, %%mm4\n\t"
645 "psllq $16, %%mm5\n\t"
646 "por %%mm3, %%mm1\n\t"
647 "pand %6, %%mm5\n\t"
648 "por %%mm5, %%mm4\n\t"
649
650 MOVNTQ" %%mm0, %0\n\t"
651 MOVNTQ" %%mm1, 8%0\n\t"
652 MOVNTQ" %%mm4, 16%0"
653
654 :"=m"(*d)
655 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
656 :"memory");
657 d += 24;
658 s += 8;
659 }
528 __asm __volatile(SFENCE:::"memory"); 660 __asm __volatile(SFENCE:::"memory");
529 __asm __volatile(EMMS:::"memory"); 661 __asm __volatile(EMMS:::"memory");
530 #else 662 #endif
531 unsigned j,i,num_pixels=src_size/3; 663 while(s < end)
532 uint16_t *d = (uint16_t *)dst; 664 {
533 for(i=0,j=0; j<num_pixels; i+=3,j++) 665 register uint16_t bgr;
534 { 666 bgr = *s++;
535 const int b= src[i+0]; 667 *d++ = (bgr&0x1F)<<3;
536 const int g= src[i+1]; 668 *d++ = (bgr&0x3E0)>>2;
537 const int r= src[i+2]; 669 *d++ = (bgr&0x7C00)>>7;
538 670 }
539 d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); 671 }
540 } 672
541 #endif 673 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, unsigned src_size)
674 {
675 const uint16_t *end;
676 #ifdef HAVE_MMX
677 const uint16_t *mm_end;
678 #endif
679 uint8_t *d = (uint8_t *)dst;
680 const uint16_t *s = (const uint16_t *)src;
681 end = s + src_size/2;
682 #ifdef HAVE_MMX
683 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
684 mm_end = (uint16_t*)((((unsigned long)end)/8)*8);
685 while(s < mm_end)
686 {
687 __asm __volatile(
688 PREFETCH" 32%1\n\t"
689 "movq %1, %%mm0\n\t"
690 "movq %1, %%mm1\n\t"
691 "movq %1, %%mm2\n\t"
692 "pand %2, %%mm0\n\t"
693 "pand %3, %%mm1\n\t"
694 "pand %4, %%mm2\n\t"
695 "psllq $3, %%mm0\n\t"
696 "psrlq $3, %%mm1\n\t"
697 "psrlq $8, %%mm2\n\t"
698 "movq %%mm0, %%mm3\n\t"
699 "movq %%mm1, %%mm4\n\t"
700 "movq %%mm2, %%mm5\n\t"
701 "punpcklwd %5, %%mm0\n\t"
702 "punpcklwd %5, %%mm1\n\t"
703 "punpcklwd %5, %%mm2\n\t"
704 "punpckhwd %5, %%mm3\n\t"
705 "punpckhwd %5, %%mm4\n\t"
706 "punpckhwd %5, %%mm5\n\t"
707 "psllq $8, %%mm1\n\t"
708 "psllq $16, %%mm2\n\t"
709 "por %%mm1, %%mm0\n\t"
710 "por %%mm2, %%mm0\n\t"
711 "psllq $8, %%mm4\n\t"
712 "psllq $16, %%mm5\n\t"
713 "por %%mm4, %%mm3\n\t"
714 "por %%mm5, %%mm3\n\t"
715
716 "movq %%mm0, %%mm6\n\t"
717 "movq %%mm3, %%mm7\n\t"
718
719 "movq 8%1, %%mm0\n\t"
720 "movq 8%1, %%mm1\n\t"
721 "movq 8%1, %%mm2\n\t"
722 "pand %2, %%mm0\n\t"
723 "pand %3, %%mm1\n\t"
724 "pand %4, %%mm2\n\t"
725 "psllq $3, %%mm0\n\t"
726 "psrlq $3, %%mm1\n\t"
727 "psrlq $8, %%mm2\n\t"
728 "movq %%mm0, %%mm3\n\t"
729 "movq %%mm1, %%mm4\n\t"
730 "movq %%mm2, %%mm5\n\t"
731 "punpcklwd %5, %%mm0\n\t"
732 "punpcklwd %5, %%mm1\n\t"
733 "punpcklwd %5, %%mm2\n\t"
734 "punpckhwd %5, %%mm3\n\t"
735 "punpckhwd %5, %%mm4\n\t"
736 "punpckhwd %5, %%mm5\n\t"
737 "psllq $8, %%mm1\n\t"
738 "psllq $16, %%mm2\n\t"
739 "por %%mm1, %%mm0\n\t"
740 "por %%mm2, %%mm0\n\t"
741 "psllq $8, %%mm4\n\t"
742 "psllq $16, %%mm5\n\t"
743 "por %%mm4, %%mm3\n\t"
744 "por %%mm5, %%mm3\n\t"
745 :"=m"(*d)
746 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
747 :"memory");
748 /* Borrowed 32 to 24 */
749 __asm __volatile(
750 "movq %%mm0, %%mm4\n\t"
751 "movq %%mm3, %%mm5\n\t"
752 "movq %%mm6, %%mm0\n\t"
753 "movq %%mm7, %%mm1\n\t"
754
755 "movq %%mm4, %%mm6\n\t"
756 "movq %%mm5, %%mm7\n\t"
757 "movq %%mm0, %%mm2\n\t"
758 "movq %%mm1, %%mm3\n\t"
759
760 "psrlq $8, %%mm2\n\t"
761 "psrlq $8, %%mm3\n\t"
762 "psrlq $8, %%mm6\n\t"
763 "psrlq $8, %%mm7\n\t"
764 "pand %2, %%mm0\n\t"
765 "pand %2, %%mm1\n\t"
766 "pand %2, %%mm4\n\t"
767 "pand %2, %%mm5\n\t"
768 "pand %3, %%mm2\n\t"
769 "pand %3, %%mm3\n\t"
770 "pand %3, %%mm6\n\t"
771 "pand %3, %%mm7\n\t"
772 "por %%mm2, %%mm0\n\t"
773 "por %%mm3, %%mm1\n\t"
774 "por %%mm6, %%mm4\n\t"
775 "por %%mm7, %%mm5\n\t"
776
777 "movq %%mm1, %%mm2\n\t"
778 "movq %%mm4, %%mm3\n\t"
779 "psllq $48, %%mm2\n\t"
780 "psllq $32, %%mm3\n\t"
781 "pand %4, %%mm2\n\t"
782 "pand %5, %%mm3\n\t"
783 "por %%mm2, %%mm0\n\t"
784 "psrlq $16, %%mm1\n\t"
785 "psrlq $32, %%mm4\n\t"
786 "psllq $16, %%mm5\n\t"
787 "por %%mm3, %%mm1\n\t"
788 "pand %6, %%mm5\n\t"
789 "por %%mm5, %%mm4\n\t"
790
791 MOVNTQ" %%mm0, %0\n\t"
792 MOVNTQ" %%mm1, 8%0\n\t"
793 MOVNTQ" %%mm4, 16%0"
794
795 :"=m"(*d)
796 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
797 :"memory");
798 d += 24;
799 s += 8;
800 }
801 __asm __volatile(SFENCE:::"memory");
802 __asm __volatile(EMMS:::"memory");
803 #endif
804 while(s < end)
805 {
806 register uint16_t bgr;
807 bgr = *s++;
808 *d++ = (bgr&0x1F)<<3;
809 *d++ = (bgr&0x7E0)>>3;
810 *d++ = (bgr&0xF800)>>8;
811 }
812 }
813
814 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, unsigned src_size)
815 {
816 const uint16_t *end;
817 #ifdef HAVE_MMX
818 const uint16_t *mm_end;
819 #endif
820 uint8_t *d = (uint8_t *)dst;
821 const uint16_t *s = (const uint16_t *)src;
822 end = s + src_size/2;
823 #ifdef HAVE_MMX
824 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
825 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
826 mm_end = (uint16_t*)((((unsigned long)end)/4)*4);
827 while(s < mm_end)
828 {
829 __asm __volatile(
830 PREFETCH" 32%1\n\t"
831 "movq %1, %%mm0\n\t"
832 "movq %1, %%mm1\n\t"
833 "movq %1, %%mm2\n\t"
834 "pand %2, %%mm0\n\t"
835 "pand %3, %%mm1\n\t"
836 "pand %4, %%mm2\n\t"
837 "psllq $3, %%mm0\n\t"
838 "psrlq $2, %%mm1\n\t"
839 "psrlq $7, %%mm2\n\t"
840 "movq %%mm0, %%mm3\n\t"
841 "movq %%mm1, %%mm4\n\t"
842 "movq %%mm2, %%mm5\n\t"
843 "punpcklwd %%mm7, %%mm0\n\t"
844 "punpcklwd %%mm7, %%mm1\n\t"
845 "punpcklwd %%mm7, %%mm2\n\t"
846 "punpckhwd %%mm7, %%mm3\n\t"
847 "punpckhwd %%mm7, %%mm4\n\t"
848 "punpckhwd %%mm7, %%mm5\n\t"
849 "psllq $8, %%mm1\n\t"
850 "psllq $16, %%mm2\n\t"
851 "por %%mm1, %%mm0\n\t"
852 "por %%mm2, %%mm0\n\t"
853 "psllq $8, %%mm4\n\t"
854 "psllq $16, %%mm5\n\t"
855 "por %%mm4, %%mm3\n\t"
856 "por %%mm5, %%mm3\n\t"
857 MOVNTQ" %%mm0, %0\n\t"
858 MOVNTQ" %%mm3, 8%0\n\t"
859 :"=m"(*d)
860 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
861 :"memory");
862 d += 16;
863 s += 4;
864 }
865 __asm __volatile(SFENCE:::"memory");
866 __asm __volatile(EMMS:::"memory");
867 #endif
868 while(s < end)
869 {
870 register uint16_t bgr;
871 bgr = *s++;
872 *d++ = (bgr&0x1F)<<3;
873 *d++ = (bgr&0x3E0)>>2;
874 *d++ = (bgr&0x7C00)>>7;
875 *d++ = 0;
876 }
877 }
878
879 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, unsigned src_size)
880 {
881 const uint16_t *end;
882 #ifdef HAVE_MMX
883 const uint16_t *mm_end;
884 #endif
885 uint8_t *d = (uint8_t *)dst;
886 const uint16_t *s = (uint16_t *)src;
887 end = s + src_size/2;
888 #ifdef HAVE_MMX
889 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
890 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
891 mm_end = (uint16_t*)((((unsigned long)end)/4)*4);
892 while(s < mm_end)
893 {
894 __asm __volatile(
895 PREFETCH" 32%1\n\t"
896 "movq %1, %%mm0\n\t"
897 "movq %1, %%mm1\n\t"
898 "movq %1, %%mm2\n\t"
899 "pand %2, %%mm0\n\t"
900 "pand %3, %%mm1\n\t"
901 "pand %4, %%mm2\n\t"
902 "psllq $3, %%mm0\n\t"
903 "psrlq $3, %%mm1\n\t"
904 "psrlq $8, %%mm2\n\t"
905 "movq %%mm0, %%mm3\n\t"
906 "movq %%mm1, %%mm4\n\t"
907 "movq %%mm2, %%mm5\n\t"
908 "punpcklwd %%mm7, %%mm0\n\t"
909 "punpcklwd %%mm7, %%mm1\n\t"
910 "punpcklwd %%mm7, %%mm2\n\t"
911 "punpckhwd %%mm7, %%mm3\n\t"
912 "punpckhwd %%mm7, %%mm4\n\t"
913 "punpckhwd %%mm7, %%mm5\n\t"
914 "psllq $8, %%mm1\n\t"
915 "psllq $16, %%mm2\n\t"
916 "por %%mm1, %%mm0\n\t"
917 "por %%mm2, %%mm0\n\t"
918 "psllq $8, %%mm4\n\t"
919 "psllq $16, %%mm5\n\t"
920 "por %%mm4, %%mm3\n\t"
921 "por %%mm5, %%mm3\n\t"
922 MOVNTQ" %%mm0, %0\n\t"
923 MOVNTQ" %%mm3, 8%0\n\t"
924 :"=m"(*d)
925 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
926 :"memory");
927 d += 16;
928 s += 4;
929 }
930 __asm __volatile(SFENCE:::"memory");
931 __asm __volatile(EMMS:::"memory");
932 #endif
933 while(s < end)
934 {
935 register uint16_t bgr;
936 bgr = *s++;
937 *d++ = (bgr&0x1F)<<3;
938 *d++ = (bgr&0x7E0)>>3;
939 *d++ = (bgr&0xF800)>>8;
940 *d++ = 0;
941 }
542 } 942 }
543 943
544 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size) 944 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size)
545 { 945 {
546 #ifdef HAVE_MMX 946 #ifdef HAVE_MMX
947 /* TODO: unroll this loop */
547 asm volatile ( 948 asm volatile (
548 "xorl %%eax, %%eax \n\t" 949 "xorl %%eax, %%eax \n\t"
549 ".balign 16 \n\t" 950 ".balign 16 \n\t"
550 "1: \n\t" 951 "1: \n\t"
551 PREFETCH" 32(%0, %%eax) \n\t" 952 PREFETCH" 32(%0, %%eax) \n\t"
552 "movq (%0, %%eax), %%mm0 \n\t" 953 "movq (%0, %%eax), %%mm0 \n\t"
553 "movq %%mm0, %%mm1 \n\t" 954 "movq %%mm0, %%mm1 \n\t"
554 "movq %%mm0, %%mm2 \n\t" 955 "movq %%mm0, %%mm2 \n\t"
555 "pslld $16, %%mm0 \n\t" 956 "pslld $16, %%mm0 \n\t"
556 "psrld $16, %%mm1 \n\t" 957 "psrld $16, %%mm1 \n\t"
557 "pand "MANGLE(mask32r)", %%mm0 \n\t" 958 "pand "MANGLE(mask32r)", %%mm0 \n\t"
558 "pand "MANGLE(mask32g)", %%mm2 \n\t" 959 "pand "MANGLE(mask32g)", %%mm2 \n\t"
559 "pand "MANGLE(mask32b)", %%mm1 \n\t" 960 "pand "MANGLE(mask32b)", %%mm1 \n\t"
560 "por %%mm0, %%mm2 \n\t" 961 "por %%mm0, %%mm2 \n\t"
561 "por %%mm1, %%mm2 \n\t" 962 "por %%mm1, %%mm2 \n\t"
562 MOVNTQ" %%mm2, (%1, %%eax) \n\t" 963 MOVNTQ" %%mm2, (%1, %%eax) \n\t"
563 "addl $8, %%eax \n\t" 964 "addl $8, %%eax \n\t"
564 "cmpl %2, %%eax \n\t" 965 "cmpl %2, %%eax \n\t"
568 ); 969 );
569 970
570 __asm __volatile(SFENCE:::"memory"); 971 __asm __volatile(SFENCE:::"memory");
571 __asm __volatile(EMMS:::"memory"); 972 __asm __volatile(EMMS:::"memory");
572 #else 973 #else
573 int i; 974 unsigned i;
574 int num_pixels= src_size >> 2; 975 unsigned num_pixels = src_size >> 2;
575 for(i=0; i<num_pixels; i++) 976 for(i=0; i<num_pixels; i++)
576 { 977 {
577 dst[4*i + 0] = src[4*i + 2]; 978 dst[4*i + 0] = src[4*i + 2];
578 dst[4*i + 1] = src[4*i + 1]; 979 dst[4*i + 1] = src[4*i + 1];
579 dst[4*i + 2] = src[4*i + 0]; 980 dst[4*i + 2] = src[4*i + 0];
581 #endif 982 #endif
582 } 983 }
583 984
584 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) 985 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size)
585 { 986 {
586 int i; 987 unsigned i;
587 #ifdef HAVE_MMX 988 #ifdef HAVE_MMX
588 int mmx_size= 23 - src_size; 989 int mmx_size= 23 - src_size;
589 asm volatile ( 990 asm volatile (
590 "movq "MANGLE(mask24r)", %%mm5 \n\t" 991 "movq "MANGLE(mask24r)", %%mm5 \n\t"
591 "movq "MANGLE(mask24g)", %%mm6 \n\t" 992 "movq "MANGLE(mask24g)", %%mm6 \n\t"
629 1030
630 __asm __volatile(SFENCE:::"memory"); 1031 __asm __volatile(SFENCE:::"memory");
631 __asm __volatile(EMMS:::"memory"); 1032 __asm __volatile(EMMS:::"memory");
632 1033
633 if(mmx_size==23) return; //finihsed, was multiple of 8 1034 if(mmx_size==23) return; //finihsed, was multiple of 8
1035
634 src+= src_size; 1036 src+= src_size;
635 dst+= src_size; 1037 dst+= src_size;
636 src_size= 23 - mmx_size; 1038 src_size= 23-mmx_size;
637 src-= src_size; 1039 src-= src_size;
638 dst-= src_size; 1040 dst-= src_size;
639 #endif 1041 #endif
640 for(i=0; i<src_size; i+=3) 1042 for(i=0; i<src_size; i+=3)
641 { 1043 {
642 register int x; 1044 register uint8_t x;
643 x = src[i + 2]; 1045 x = src[i + 2];
644 dst[i + 1] = src[i + 1]; 1046 dst[i + 1] = src[i + 1];
645 dst[i + 2] = src[i + 0]; 1047 dst[i + 2] = src[i + 0];
646 dst[i + 0] = x; 1048 dst[i + 0] = x;
647 } 1049 }
649 1051
650 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, 1052 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
651 unsigned int width, unsigned int height, 1053 unsigned int width, unsigned int height,
652 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma) 1054 unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma)
653 { 1055 {
654 int y; 1056 unsigned y;
655 const int chromWidth= width>>1; 1057 const unsigned chromWidth= width>>1;
656 for(y=0; y<height; y++) 1058 for(y=0; y<height; y++)
657 { 1059 {
658 #ifdef HAVE_MMX 1060 #ifdef HAVE_MMX
659 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) 1061 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
660 asm volatile( 1062 asm volatile(
689 " jb 1b \n\t" 1091 " jb 1b \n\t"
690 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth) 1092 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth)
691 : "%eax" 1093 : "%eax"
692 ); 1094 );
693 #else 1095 #else
1096 #if __WORDSIZE >= 64
694 int i; 1097 int i;
695 for(i=0; i<chromWidth; i++) 1098 uint64_t *ldst = (uint64_t *) dst;
696 { 1099 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
697 dst[4*i+0] = ysrc[2*i+0]; 1100 for(i = 0; i < chromWidth; i += 2){
698 dst[4*i+1] = usrc[i]; 1101 uint64_t k, l;
699 dst[4*i+2] = ysrc[2*i+1]; 1102 k = yc[0] + (uc[0] << 8) +
700 dst[4*i+3] = vsrc[i]; 1103 (yc[1] << 16) + (vc[0] << 24);
1104 l = yc[2] + (uc[1] << 8) +
1105 (yc[3] << 16) + (vc[1] << 24);
1106 *ldst++ = k + (l << 32);
1107 yc += 4;
1108 uc += 2;
1109 vc += 2;
701 } 1110 }
1111
1112 #else
1113 int i, *idst = (int32_t *) dst;
1114 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1115 for(i = 0; i < chromWidth; i++){
1116 *idst++ = yc[0] + (uc[0] << 8) +
1117 (yc[1] << 16) + (vc[0] << 24);
1118 yc += 2;
1119 uc++;
1120 vc++;
1121 }
1122 #endif
702 #endif 1123 #endif
703 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) 1124 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
704 { 1125 {
705 usrc += chromStride; 1126 usrc += chromStride;
706 vsrc += chromStride; 1127 vsrc += chromStride;
746 */ 1167 */
747 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 1168 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
748 unsigned int width, unsigned int height, 1169 unsigned int width, unsigned int height,
749 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) 1170 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
750 { 1171 {
751 int y; 1172 unsigned y;
752 const int chromWidth= width>>1; 1173 const unsigned chromWidth= width>>1;
753 for(y=0; y<height; y+=2) 1174 for(y=0; y<height; y+=2)
754 { 1175 {
755 #ifdef HAVE_MMX 1176 #ifdef HAVE_MMX
756 asm volatile( 1177 asm volatile(
757 "xorl %%eax, %%eax \n\t" 1178 "xorl %%eax, %%eax \n\t"
833 1254
834 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) 1255 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
835 : "memory", "%eax" 1256 : "memory", "%eax"
836 ); 1257 );
837 #else 1258 #else
838 int i; 1259 unsigned i;
839 for(i=0; i<chromWidth; i++) 1260 for(i=0; i<chromWidth; i++)
840 { 1261 {
841 ydst[2*i+0] = src[4*i+0]; 1262 ydst[2*i+0] = src[4*i+0];
842 udst[i] = src[4*i+1]; 1263 udst[i] = src[4*i+1];
843 ydst[2*i+1] = src[4*i+2]; 1264 ydst[2*i+1] = src[4*i+2];
882 */ 1303 */
883 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 1304 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
884 unsigned int width, unsigned int height, 1305 unsigned int width, unsigned int height,
885 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) 1306 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
886 { 1307 {
887 int y; 1308 unsigned y;
888 const int chromWidth= width>>1; 1309 const unsigned chromWidth= width>>1;
889 for(y=0; y<height; y+=2) 1310 for(y=0; y<height; y+=2)
890 { 1311 {
891 #ifdef HAVE_MMX 1312 #ifdef HAVE_MMX
892 asm volatile( 1313 asm volatile(
893 "xorl %%eax, %%eax \n\t" 1314 "xorl %%eax, %%eax \n\t"
969 1390
970 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth) 1391 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
971 : "memory", "%eax" 1392 : "memory", "%eax"
972 ); 1393 );
973 #else 1394 #else
974 int i; 1395 unsigned i;
975 for(i=0; i<chromWidth; i++) 1396 for(i=0; i<chromWidth; i++)
976 { 1397 {
977 udst[i] = src[4*i+0]; 1398 udst[i] = src[4*i+0];
978 ydst[2*i+0] = src[4*i+1]; 1399 ydst[2*i+0] = src[4*i+1];
979 vdst[i] = src[4*i+2]; 1400 vdst[i] = src[4*i+2];
1008 */ 1429 */
1009 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 1430 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1010 unsigned int width, unsigned int height, 1431 unsigned int width, unsigned int height,
1011 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) 1432 unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
1012 { 1433 {
1013 int y; 1434 unsigned y;
1014 const int chromWidth= width>>1; 1435 const unsigned chromWidth= width>>1;
1015 #ifdef HAVE_MMX 1436 #ifdef HAVE_MMX
1016 for(y=0; y<height-2; y+=2) 1437 for(y=0; y<height-2; y+=2)
1017 { 1438 {
1018 int i; 1439 unsigned i;
1019 for(i=0; i<2; i++) 1440 for(i=0; i<2; i++)
1020 { 1441 {
1021 asm volatile( 1442 asm volatile(
1022 "movl %2, %%eax \n\t" 1443 "movl %2, %%eax \n\t"
1023 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" 1444 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
1252 #else 1673 #else
1253 y=0; 1674 y=0;
1254 #endif 1675 #endif
1255 for(; y<height; y+=2) 1676 for(; y<height; y+=2)
1256 { 1677 {
1257 int i; 1678 unsigned i;
1258 for(i=0; i<chromWidth; i++) 1679 for(i=0; i<chromWidth; i++)
1259 { 1680 {
1260 unsigned int b= src[6*i+0]; 1681 unsigned int b= src[6*i+0];
1261 unsigned int g= src[6*i+1]; 1682 unsigned int g= src[6*i+1];
1262 unsigned int r= src[6*i+2]; 1683 unsigned int r= src[6*i+2];
1302 src += srcStride; 1723 src += srcStride;
1303 } 1724 }
1304 } 1725 }
1305 1726
1306 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, 1727 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
1307 int width, int height, int src1Stride, int src2Stride, int dstStride){ 1728 unsigned width, unsigned height, unsigned src1Stride,
1308 int h; 1729 unsigned src2Stride, unsigned dstStride){
1730 unsigned h;
1309 1731
1310 for(h=0; h < height; h++) 1732 for(h=0; h < height; h++)
1311 { 1733 {
1312 int w; 1734 unsigned w;
1313 1735
1314 #ifdef HAVE_MMX 1736 #ifdef HAVE_MMX
1315 #ifdef HAVE_SSE2 1737 #ifdef HAVE_SSE2
1316 asm( 1738 asm(
1317 "xorl %%eax, %%eax \n\t" 1739 "xorl %%eax, %%eax \n\t"