comparison postproc/rgb2rgb_template.c @ 6096:f38c7228a094

fixing end overwrite bugs (some at least) this needs testing, untested parts should be reversed before release i tested 24->32, 15->16, bgr24->rgb, bgr32->rgb (retesting them isnt a bad idea either ...)
author michael
date Wed, 15 May 2002 02:27:41 +0000
parents f4f3cfcd0d64
children c5cf988c6d6f
comparison
equal deleted inserted replaced
6095:891cff8aba60 6096:f38c7228a094
54 { 54 {
55 uint8_t *dest = dst; 55 uint8_t *dest = dst;
56 const uint8_t *s = src; 56 const uint8_t *s = src;
57 const uint8_t *end; 57 const uint8_t *end;
58 #ifdef HAVE_MMX 58 #ifdef HAVE_MMX
59 uint8_t *mm_end; 59 const uint8_t *mm_end;
60 #endif 60 #endif
61 end = s + src_size; 61 end = s + src_size;
62 #ifdef HAVE_MMX 62 #ifdef HAVE_MMX
63 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 63 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
64 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*4))*(MMREG_SIZE*4)); 64 mm_end = end - 23;
65 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); 65 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
66 if(mm_end == end) mm_end -= MMREG_SIZE*4;
67 while(s < mm_end) 66 while(s < mm_end)
68 { 67 {
69 __asm __volatile( 68 __asm __volatile(
70 PREFETCH" 32%1\n\t" 69 PREFETCH" 32%1\n\t"
71 "movd %1, %%mm0\n\t" 70 "movd %1, %%mm0\n\t"
106 { 105 {
107 uint8_t *dest = dst; 106 uint8_t *dest = dst;
108 const uint8_t *s = src; 107 const uint8_t *s = src;
109 const uint8_t *end; 108 const uint8_t *end;
110 #ifdef HAVE_MMX 109 #ifdef HAVE_MMX
111 uint8_t *mm_end; 110 const uint8_t *mm_end;
112 #endif 111 #endif
113 end = s + src_size; 112 end = s + src_size;
114 #ifdef HAVE_MMX 113 #ifdef HAVE_MMX
115 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 114 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
116 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*4))*(MMREG_SIZE*4)); 115 mm_end = end - 31;
117 while(s < mm_end) 116 while(s < mm_end)
118 { 117 {
119 __asm __volatile( 118 __asm __volatile(
120 PREFETCH" 32%1\n\t" 119 PREFETCH" 32%1\n\t"
121 "movq %1, %%mm0\n\t" 120 "movq %1, %%mm0\n\t"
186 32bit c version, and and&add trick by Michael Niedermayer 185 32bit c version, and and&add trick by Michael Niedermayer
187 */ 186 */
188 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) 187 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size)
189 { 188 {
190 #ifdef HAVE_MMX 189 #ifdef HAVE_MMX
191 register const char* s=src+src_size; 190 register int offs=15-src_size;
192 register char* d=dst+src_size; 191 register const char* s=src-offs;
193 register int offs=-src_size; 192 register char* d=dst-offs;
194 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs))); 193 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs)));
195 __asm __volatile( 194 __asm __volatile(
196 "movq %0, %%mm4\n\t" 195 "movq %0, %%mm4\n\t"
197 ::"m"(mask15s)); 196 ::"m"(mask15s));
198 while(offs<0) 197 while(offs<0)
250 #ifdef HAVE_MMX 249 #ifdef HAVE_MMX
251 const uint8_t *s = src; 250 const uint8_t *s = src;
252 const uint8_t *end,*mm_end; 251 const uint8_t *end,*mm_end;
253 uint16_t *d = (uint16_t *)dst; 252 uint16_t *d = (uint16_t *)dst;
254 end = s + src_size; 253 end = s + src_size;
255 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2)); 254 mm_end = end - 15;
256 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 255 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
257 __asm __volatile( 256 __asm __volatile(
258 "movq %0, %%mm7\n\t" 257 "movq %0, %%mm7\n\t"
259 "movq %1, %%mm6\n\t" 258 "movq %1, %%mm6\n\t"
260 ::"m"(red_16mask),"m"(green_16mask)); 259 ::"m"(red_16mask),"m"(green_16mask));
296 while(s < end) 295 while(s < end)
297 { 296 {
298 const int b= *s++; 297 const int b= *s++;
299 const int g= *s++; 298 const int g= *s++;
300 const int r= *s++; 299 const int r= *s++;
300 s++;
301 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); 301 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
302 } 302 }
303 __asm __volatile(SFENCE:::"memory"); 303 __asm __volatile(SFENCE:::"memory");
304 __asm __volatile(EMMS:::"memory"); 304 __asm __volatile(EMMS:::"memory");
305 #else 305 #else
321 #ifdef HAVE_MMX 321 #ifdef HAVE_MMX
322 const uint8_t *s = src; 322 const uint8_t *s = src;
323 const uint8_t *end,*mm_end; 323 const uint8_t *end,*mm_end;
324 uint16_t *d = (uint16_t *)dst; 324 uint16_t *d = (uint16_t *)dst;
325 end = s + src_size; 325 end = s + src_size;
326 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2)); 326 mm_end = end - 15;
327 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 327 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
328 __asm __volatile( 328 __asm __volatile(
329 "movq %0, %%mm7\n\t" 329 "movq %0, %%mm7\n\t"
330 "movq %1, %%mm6\n\t" 330 "movq %1, %%mm6\n\t"
331 ::"m"(red_15mask),"m"(green_15mask)); 331 ::"m"(red_15mask),"m"(green_15mask));
367 while(s < end) 367 while(s < end)
368 { 368 {
369 const int b= *s++; 369 const int b= *s++;
370 const int g= *s++; 370 const int g= *s++;
371 const int r= *s++; 371 const int r= *s++;
372 s++;
372 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); 373 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
373 } 374 }
374 __asm __volatile(SFENCE:::"memory"); 375 __asm __volatile(SFENCE:::"memory");
375 __asm __volatile(EMMS:::"memory"); 376 __asm __volatile(EMMS:::"memory");
376 #else 377 #else
392 #ifdef HAVE_MMX 393 #ifdef HAVE_MMX
393 const uint8_t *s = src; 394 const uint8_t *s = src;
394 const uint8_t *end,*mm_end; 395 const uint8_t *end,*mm_end;
395 uint16_t *d = (uint16_t *)dst; 396 uint16_t *d = (uint16_t *)dst;
396 end = s + src_size; 397 end = s + src_size;
397 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2)); 398 mm_end = end - 11;
398 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 399 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
399 __asm __volatile( 400 __asm __volatile(
400 "movq %0, %%mm7\n\t" 401 "movq %0, %%mm7\n\t"
401 "movq %1, %%mm6\n\t" 402 "movq %1, %%mm6\n\t"
402 ::"m"(red_16mask),"m"(green_16mask)); 403 ::"m"(red_16mask),"m"(green_16mask));
403 if(mm_end == end) mm_end -= MMREG_SIZE*2;
404 while(s < mm_end) 404 while(s < mm_end)
405 { 405 {
406 __asm __volatile( 406 __asm __volatile(
407 PREFETCH" 32%1\n\t" 407 PREFETCH" 32%1\n\t"
408 "movd %1, %%mm0\n\t" 408 "movd %1, %%mm0\n\t"
464 #ifdef HAVE_MMX 464 #ifdef HAVE_MMX
465 const uint8_t *s = src; 465 const uint8_t *s = src;
466 const uint8_t *end,*mm_end; 466 const uint8_t *end,*mm_end;
467 uint16_t *d = (uint16_t *)dst; 467 uint16_t *d = (uint16_t *)dst;
468 end = s + src_size; 468 end = s + src_size;
469 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2)); 469 mm_end = end -11;
470 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 470 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
471 __asm __volatile( 471 __asm __volatile(
472 "movq %0, %%mm7\n\t" 472 "movq %0, %%mm7\n\t"
473 "movq %1, %%mm6\n\t" 473 "movq %1, %%mm6\n\t"
474 ::"m"(red_15mask),"m"(green_15mask)); 474 ::"m"(red_15mask),"m"(green_15mask));
475 if(mm_end == end) mm_end -= MMREG_SIZE*2;
476 while(s < mm_end) 475 while(s < mm_end)
477 { 476 {
478 __asm __volatile( 477 __asm __volatile(
479 PREFETCH" 32%1\n\t" 478 PREFETCH" 32%1\n\t"
480 "movd %1, %%mm0\n\t" 479 "movd %1, %%mm0\n\t"
548 "pand "MANGLE(mask32g)", %%mm2 \n\t" 547 "pand "MANGLE(mask32g)", %%mm2 \n\t"
549 "pand "MANGLE(mask32b)", %%mm1 \n\t" 548 "pand "MANGLE(mask32b)", %%mm1 \n\t"
550 "por %%mm0, %%mm2 \n\t" 549 "por %%mm0, %%mm2 \n\t"
551 "por %%mm1, %%mm2 \n\t" 550 "por %%mm1, %%mm2 \n\t"
552 MOVNTQ" %%mm2, (%1, %%eax) \n\t" 551 MOVNTQ" %%mm2, (%1, %%eax) \n\t"
553 "addl $2, %%eax \n\t" 552 "addl $8, %%eax \n\t"
554 "cmpl %2, %%eax \n\t" 553 "cmpl %2, %%eax \n\t"
555 " jb 1b \n\t" 554 " jb 1b \n\t"
556 :: "r" (src), "r"(dst), "r" (src_size) 555 :: "r" (src), "r"(dst), "r" (src_size)
557 : "%eax" 556 : "%eax"
558 ); 557 );
618 ); 617 );
619 618
620 __asm __volatile(SFENCE:::"memory"); 619 __asm __volatile(SFENCE:::"memory");
621 __asm __volatile(EMMS:::"memory"); 620 __asm __volatile(EMMS:::"memory");
622 621
623 if(!mmx_size) return; //finihsed, was multiple of 8 622 if(mmx_size==23) return; //finihsed, was multiple of 8
624
625 src+= src_size; 623 src+= src_size;
626 dst+= src_size; 624 dst+= src_size;
627 src_size= 24-mmx_size; 625 src_size= 23 - mmx_size;
628 src-= src_size; 626 src-= src_size;
629 dst-= src_size; 627 dst-= src_size;
630 #endif 628 #endif
631 for(i=0; i<src_size; i+=3) 629 for(i=0; i<src_size; i+=3)
632 { 630 {