# HG changeset patch # User michael # Date 1018658901 0 # Node ID 21bd4b32abb4d95c62c8a1be3d87831127d95281 # Parent 1f231091fb5459fa3c97e09dab1b49c734fe615b rgb24->bgr24 diff -r 1f231091fb54 -r 21bd4b32abb4 postproc/rgb2rgb.c --- a/postproc/rgb2rgb.c Sat Apr 13 00:47:54 2002 +0000 +++ b/postproc/rgb2rgb.c Sat Apr 13 00:48:21 2002 +0000 @@ -24,6 +24,9 @@ static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL; +static const uint64_t mask24b __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL; +static const uint64_t mask24g __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL; +static const uint64_t mask24r __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL; static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL; static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL; static const uint64_t mask24hh __attribute__((aligned(8))) = 0xffff000000000000ULL; @@ -316,6 +319,23 @@ #endif } +void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) +{ +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + rgb24tobgr24_MMX2(src, dst, src_size); + else if(gCpuCaps.has3DNow) + rgb24tobgr24_3DNow(src, dst, src_size); + else if(gCpuCaps.hasMMX) + rgb24tobgr24_MMX(src, dst, src_size); + else + rgb24tobgr24_C(src, dst, src_size); +#else + rgb24tobgr24_C(src, dst, src_size); +#endif +} + /** * * height should be a multiple of 2 and width should be a multiple of 16 (if this is a diff -r 1f231091fb54 -r 21bd4b32abb4 postproc/rgb2rgb.h --- a/postproc/rgb2rgb.h Sat Apr 13 00:47:54 2002 +0000 +++ b/postproc/rgb2rgb.h Sat Apr 13 00:48:21 2002 +0000 @@ -17,6 +17,7 @@ extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); +extern void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size); extern void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); diff -r 1f231091fb54 -r 21bd4b32abb4 postproc/rgb2rgb_template.c --- a/postproc/rgb2rgb_template.c Sat Apr 13 00:47:54 2002 +0000 +++ b/postproc/rgb2rgb_template.c Sat Apr 13 00:48:21 2002 +0000 @@ -571,6 +571,73 @@ #endif } +static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) +{ + int i; +#ifdef HAVE_MMX + int mmx_size= 23 - src_size; + asm volatile ( + "movq "MANGLE(mask24r)", %%mm5 \n\t" + "movq "MANGLE(mask24g)", %%mm6 \n\t" + "movq "MANGLE(mask24b)", %%mm7 \n\t" + ".balign 16 \n\t" + "1: \n\t" + PREFETCH" 32(%1, %%eax) \n\t" + "movq (%1, %%eax), %%mm0 \n\t" // BGR BGR BG + "movq (%1, %%eax), %%mm1 \n\t" // BGR BGR BG + "movq 2(%1, %%eax), %%mm2 \n\t" // R BGR BGR B + "psllq $16, %%mm0 \n\t" // 00 BGR BGR + "pand %%mm5, %%mm0 \n\t" + "pand %%mm6, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "por %%mm0, %%mm1 \n\t" + "por %%mm2, %%mm1 \n\t" + "movq 6(%1, %%eax), %%mm0 \n\t" // BGR BGR BG + MOVNTQ" %%mm1, (%2, %%eax) \n\t" // RGB RGB RG + "movq 8(%1, %%eax), %%mm1 \n\t" // R BGR BGR B + "movq 10(%1, %%eax), %%mm2 \n\t" // GR BGR BGR + "pand %%mm7, %%mm0 \n\t" + "pand %%mm5, %%mm1 \n\t" + "pand %%mm6, %%mm2 \n\t" + "por %%mm0, %%mm1 \n\t" + "por %%mm2, %%mm1 \n\t" + "movq 14(%1, %%eax), %%mm0 \n\t" // R BGR BGR B + MOVNTQ" %%mm1, 8(%2, %%eax) \n\t" // B RGB RGB R + "movq 16(%1, %%eax), %%mm1 \n\t" // GR BGR BGR + "movq 18(%1, %%eax), %%mm2 \n\t" // BGR BGR BG + "pand %%mm6, %%mm0 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm5, %%mm2 \n\t" + "por %%mm0, %%mm1 \n\t" + "por %%mm2, %%mm1 \n\t" + MOVNTQ" %%mm1, 16(%2, %%eax) \n\t" + "addl $24, %%eax \n\t" + " js 1b \n\t" + : "+a" (mmx_size) + : "r" (src-mmx_size), "r"(dst-mmx_size) + ); + + __asm __volatile(SFENCE:::"memory"); + __asm __volatile(EMMS:::"memory"); + + if(!mmx_size) return; //finihsed, was multiple of 8 + + src+= src_size; + dst+= src_size; + src_size= 24-mmx_size; + src-= src_size; + dst-= src_size; +#endif + for(i=0; i