Mercurial > mplayer.hg
comparison postproc/rgb2rgb_template.c @ 6096:f38c7228a094
fixing end overwrite bugs (some at least)
this needs testing, untested parts should be reversed before release
i tested 24->32, 15->16, bgr24->rgb, bgr32->rgb (retesting them isnt a bad idea either ...)
author | michael |
---|---|
date | Wed, 15 May 2002 02:27:41 +0000 |
parents | f4f3cfcd0d64 |
children | c5cf988c6d6f |
comparison
equal
deleted
inserted
replaced
6095:891cff8aba60 | 6096:f38c7228a094 |
---|---|
54 { | 54 { |
55 uint8_t *dest = dst; | 55 uint8_t *dest = dst; |
56 const uint8_t *s = src; | 56 const uint8_t *s = src; |
57 const uint8_t *end; | 57 const uint8_t *end; |
58 #ifdef HAVE_MMX | 58 #ifdef HAVE_MMX |
59 uint8_t *mm_end; | 59 const uint8_t *mm_end; |
60 #endif | 60 #endif |
61 end = s + src_size; | 61 end = s + src_size; |
62 #ifdef HAVE_MMX | 62 #ifdef HAVE_MMX |
63 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 63 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
64 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*4))*(MMREG_SIZE*4)); | 64 mm_end = end - 23; |
65 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); | 65 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); |
66 if(mm_end == end) mm_end -= MMREG_SIZE*4; | |
67 while(s < mm_end) | 66 while(s < mm_end) |
68 { | 67 { |
69 __asm __volatile( | 68 __asm __volatile( |
70 PREFETCH" 32%1\n\t" | 69 PREFETCH" 32%1\n\t" |
71 "movd %1, %%mm0\n\t" | 70 "movd %1, %%mm0\n\t" |
106 { | 105 { |
107 uint8_t *dest = dst; | 106 uint8_t *dest = dst; |
108 const uint8_t *s = src; | 107 const uint8_t *s = src; |
109 const uint8_t *end; | 108 const uint8_t *end; |
110 #ifdef HAVE_MMX | 109 #ifdef HAVE_MMX |
111 uint8_t *mm_end; | 110 const uint8_t *mm_end; |
112 #endif | 111 #endif |
113 end = s + src_size; | 112 end = s + src_size; |
114 #ifdef HAVE_MMX | 113 #ifdef HAVE_MMX |
115 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); | 114 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); |
116 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*4))*(MMREG_SIZE*4)); | 115 mm_end = end - 31; |
117 while(s < mm_end) | 116 while(s < mm_end) |
118 { | 117 { |
119 __asm __volatile( | 118 __asm __volatile( |
120 PREFETCH" 32%1\n\t" | 119 PREFETCH" 32%1\n\t" |
121 "movq %1, %%mm0\n\t" | 120 "movq %1, %%mm0\n\t" |
186 32bit c version, and and&add trick by Michael Niedermayer | 185 32bit c version, and and&add trick by Michael Niedermayer |
187 */ | 186 */ |
188 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) | 187 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) |
189 { | 188 { |
190 #ifdef HAVE_MMX | 189 #ifdef HAVE_MMX |
191 register const char* s=src+src_size; | 190 register int offs=15-src_size; |
192 register char* d=dst+src_size; | 191 register const char* s=src-offs; |
193 register int offs=-src_size; | 192 register char* d=dst-offs; |
194 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs))); | 193 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs))); |
195 __asm __volatile( | 194 __asm __volatile( |
196 "movq %0, %%mm4\n\t" | 195 "movq %0, %%mm4\n\t" |
197 ::"m"(mask15s)); | 196 ::"m"(mask15s)); |
198 while(offs<0) | 197 while(offs<0) |
250 #ifdef HAVE_MMX | 249 #ifdef HAVE_MMX |
251 const uint8_t *s = src; | 250 const uint8_t *s = src; |
252 const uint8_t *end,*mm_end; | 251 const uint8_t *end,*mm_end; |
253 uint16_t *d = (uint16_t *)dst; | 252 uint16_t *d = (uint16_t *)dst; |
254 end = s + src_size; | 253 end = s + src_size; |
255 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2)); | 254 mm_end = end - 15; |
256 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 255 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
257 __asm __volatile( | 256 __asm __volatile( |
258 "movq %0, %%mm7\n\t" | 257 "movq %0, %%mm7\n\t" |
259 "movq %1, %%mm6\n\t" | 258 "movq %1, %%mm6\n\t" |
260 ::"m"(red_16mask),"m"(green_16mask)); | 259 ::"m"(red_16mask),"m"(green_16mask)); |
296 while(s < end) | 295 while(s < end) |
297 { | 296 { |
298 const int b= *s++; | 297 const int b= *s++; |
299 const int g= *s++; | 298 const int g= *s++; |
300 const int r= *s++; | 299 const int r= *s++; |
300 s++; | |
301 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | 301 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
302 } | 302 } |
303 __asm __volatile(SFENCE:::"memory"); | 303 __asm __volatile(SFENCE:::"memory"); |
304 __asm __volatile(EMMS:::"memory"); | 304 __asm __volatile(EMMS:::"memory"); |
305 #else | 305 #else |
321 #ifdef HAVE_MMX | 321 #ifdef HAVE_MMX |
322 const uint8_t *s = src; | 322 const uint8_t *s = src; |
323 const uint8_t *end,*mm_end; | 323 const uint8_t *end,*mm_end; |
324 uint16_t *d = (uint16_t *)dst; | 324 uint16_t *d = (uint16_t *)dst; |
325 end = s + src_size; | 325 end = s + src_size; |
326 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2)); | 326 mm_end = end - 15; |
327 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 327 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
328 __asm __volatile( | 328 __asm __volatile( |
329 "movq %0, %%mm7\n\t" | 329 "movq %0, %%mm7\n\t" |
330 "movq %1, %%mm6\n\t" | 330 "movq %1, %%mm6\n\t" |
331 ::"m"(red_15mask),"m"(green_15mask)); | 331 ::"m"(red_15mask),"m"(green_15mask)); |
367 while(s < end) | 367 while(s < end) |
368 { | 368 { |
369 const int b= *s++; | 369 const int b= *s++; |
370 const int g= *s++; | 370 const int g= *s++; |
371 const int r= *s++; | 371 const int r= *s++; |
372 s++; | |
372 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | 373 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
373 } | 374 } |
374 __asm __volatile(SFENCE:::"memory"); | 375 __asm __volatile(SFENCE:::"memory"); |
375 __asm __volatile(EMMS:::"memory"); | 376 __asm __volatile(EMMS:::"memory"); |
376 #else | 377 #else |
392 #ifdef HAVE_MMX | 393 #ifdef HAVE_MMX |
393 const uint8_t *s = src; | 394 const uint8_t *s = src; |
394 const uint8_t *end,*mm_end; | 395 const uint8_t *end,*mm_end; |
395 uint16_t *d = (uint16_t *)dst; | 396 uint16_t *d = (uint16_t *)dst; |
396 end = s + src_size; | 397 end = s + src_size; |
397 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2)); | 398 mm_end = end - 11; |
398 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 399 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
399 __asm __volatile( | 400 __asm __volatile( |
400 "movq %0, %%mm7\n\t" | 401 "movq %0, %%mm7\n\t" |
401 "movq %1, %%mm6\n\t" | 402 "movq %1, %%mm6\n\t" |
402 ::"m"(red_16mask),"m"(green_16mask)); | 403 ::"m"(red_16mask),"m"(green_16mask)); |
403 if(mm_end == end) mm_end -= MMREG_SIZE*2; | |
404 while(s < mm_end) | 404 while(s < mm_end) |
405 { | 405 { |
406 __asm __volatile( | 406 __asm __volatile( |
407 PREFETCH" 32%1\n\t" | 407 PREFETCH" 32%1\n\t" |
408 "movd %1, %%mm0\n\t" | 408 "movd %1, %%mm0\n\t" |
464 #ifdef HAVE_MMX | 464 #ifdef HAVE_MMX |
465 const uint8_t *s = src; | 465 const uint8_t *s = src; |
466 const uint8_t *end,*mm_end; | 466 const uint8_t *end,*mm_end; |
467 uint16_t *d = (uint16_t *)dst; | 467 uint16_t *d = (uint16_t *)dst; |
468 end = s + src_size; | 468 end = s + src_size; |
469 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2)); | 469 mm_end = end -11; |
470 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); | 470 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); |
471 __asm __volatile( | 471 __asm __volatile( |
472 "movq %0, %%mm7\n\t" | 472 "movq %0, %%mm7\n\t" |
473 "movq %1, %%mm6\n\t" | 473 "movq %1, %%mm6\n\t" |
474 ::"m"(red_15mask),"m"(green_15mask)); | 474 ::"m"(red_15mask),"m"(green_15mask)); |
475 if(mm_end == end) mm_end -= MMREG_SIZE*2; | |
476 while(s < mm_end) | 475 while(s < mm_end) |
477 { | 476 { |
478 __asm __volatile( | 477 __asm __volatile( |
479 PREFETCH" 32%1\n\t" | 478 PREFETCH" 32%1\n\t" |
480 "movd %1, %%mm0\n\t" | 479 "movd %1, %%mm0\n\t" |
548 "pand "MANGLE(mask32g)", %%mm2 \n\t" | 547 "pand "MANGLE(mask32g)", %%mm2 \n\t" |
549 "pand "MANGLE(mask32b)", %%mm1 \n\t" | 548 "pand "MANGLE(mask32b)", %%mm1 \n\t" |
550 "por %%mm0, %%mm2 \n\t" | 549 "por %%mm0, %%mm2 \n\t" |
551 "por %%mm1, %%mm2 \n\t" | 550 "por %%mm1, %%mm2 \n\t" |
552 MOVNTQ" %%mm2, (%1, %%eax) \n\t" | 551 MOVNTQ" %%mm2, (%1, %%eax) \n\t" |
553 "addl $2, %%eax \n\t" | 552 "addl $8, %%eax \n\t" |
554 "cmpl %2, %%eax \n\t" | 553 "cmpl %2, %%eax \n\t" |
555 " jb 1b \n\t" | 554 " jb 1b \n\t" |
556 :: "r" (src), "r"(dst), "r" (src_size) | 555 :: "r" (src), "r"(dst), "r" (src_size) |
557 : "%eax" | 556 : "%eax" |
558 ); | 557 ); |
618 ); | 617 ); |
619 | 618 |
620 __asm __volatile(SFENCE:::"memory"); | 619 __asm __volatile(SFENCE:::"memory"); |
621 __asm __volatile(EMMS:::"memory"); | 620 __asm __volatile(EMMS:::"memory"); |
622 | 621 |
623 if(!mmx_size) return; //finihsed, was multiple of 8 | 622 if(mmx_size==23) return; //finihsed, was multiple of 8 |
624 | |
625 src+= src_size; | 623 src+= src_size; |
626 dst+= src_size; | 624 dst+= src_size; |
627 src_size= 24-mmx_size; | 625 src_size= 23 - mmx_size; |
628 src-= src_size; | 626 src-= src_size; |
629 dst-= src_size; | 627 dst-= src_size; |
630 #endif | 628 #endif |
631 for(i=0; i<src_size; i+=3) | 629 for(i=0; i<src_size; i+=3) |
632 { | 630 { |