mplayer.hg: postproc/rgb2rgb.c annotate

annotate postproc/rgb2rgb.c @ 2717:5fa8c079ee3c

fix small xshape bug

author	pontscho
date	Mon, 05 Nov 2001 17:00:42 +0000
parents	84dff4aac89e
children	9c5e64493742

rev	line source
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	1 /*
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	2 *
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	3 * rgb2rgb.c, Software RGB to RGB convertor
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	4 * Written by Nick Kurshev.
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	5 * palette stuff & yuv stuff by Michael
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	6 */
2504 13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	7 #include <inttypes.h>
13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	8 #include "../config.h"
13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	9 #include "rgb2rgb.h"
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	10 #include "../mmx_defs.h"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	11
2535 b44113f46c96 cant compile on non x86 bugfix michael parents: 2517 diff changeset	12 #ifdef HAVE_MMX
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	13 static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	14 static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	15 static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	16 static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	17 static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
2698 22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	18 static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
2535 b44113f46c96 cant compile on non x86 bugfix michael parents: 2517 diff changeset	19 #endif
2513 d3c6b9ae24b1 sfence nick parents: 2512 diff changeset	20
2677 794dec2fae64 using const modifier nick parents: 2564 diff changeset	21 void rgb24to32(const uint8_t src,uint8_t dst,uint32_t src_size)
2504 13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	22 {
2508 94f9825a3736 Prev ver could work only on x86 nick parents: 2506 diff changeset	23 uint8_t *dest = dst;
2677 794dec2fae64 using const modifier nick parents: 2564 diff changeset	24 const uint8_t *s = src;
794dec2fae64 using const modifier nick parents: 2564 diff changeset	25 const uint8_t *end;
2510 42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	26 #ifdef HAVE_MMX
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	27 uint8_t *mm_end;
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	28 #endif
2504 13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	29 end = s + src_size;
2510 42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	30 #ifdef HAVE_MMX
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	31 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
2516 9ef4fa15b780 More elegant solution nick parents: 2514 diff changeset	32 mm_end = (uint8_t)((((unsigned long)end)/(MMREG_SIZE2))(MMREG_SIZE2));
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	33 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
2516 9ef4fa15b780 More elegant solution nick parents: 2514 diff changeset	34 if(mm_end == end) mm_end -= MMREG_SIZE*2;
2510 42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	35 while(s < mm_end)
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	36 {
2511 6db23dd30242 mmx, mmx2, 3dnow optimized 24to32 nick parents: 2510 diff changeset	37 __asm __volatile(
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	38 PREFETCH" 32%1\n\t"
2510 42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	39 "movd %1, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	40 "movd 3%1, %%mm1\n\t"
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	41 "movd 6%1, %%mm2\n\t"
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	42 "movd 9%1, %%mm3\n\t"
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	43 "punpckldq %%mm1, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	44 "punpckldq %%mm3, %%mm2\n\t"
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	45 "pand %%mm7, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	46 "pand %%mm7, %%mm2\n\t"
2511 6db23dd30242 mmx, mmx2, 3dnow optimized 24to32 nick parents: 2510 diff changeset	47 MOVNTQ" %%mm0, %0\n\t"
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32 nick parents: 2510 diff changeset	48 MOVNTQ" %%mm2, 8%0"
2510 42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	49 :"=m"(*dest)
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	50 :"m"(*s)
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	51 :"memory");
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	52 dest += 16;
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	53 s += 12;
42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	54 }
2513 d3c6b9ae24b1 sfence nick parents: 2512 diff changeset	55 __asm __volatile(SFENCE:::"memory");
2511 6db23dd30242 mmx, mmx2, 3dnow optimized 24to32 nick parents: 2510 diff changeset	56 __asm __volatile(EMMS:::"memory");
2510 42e1ae2c8f5f mmx optimized 24to32 nick parents: 2508 diff changeset	57 #endif
2504 13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	58 while(s < end)
13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	59 {
2508 94f9825a3736 Prev ver could work only on x86 nick parents: 2506 diff changeset	60 dest++ = s++;
94f9825a3736 Prev ver could work only on x86 nick parents: 2506 diff changeset	61 dest++ = s++;
94f9825a3736 Prev ver could work only on x86 nick parents: 2506 diff changeset	62 dest++ = s++;
94f9825a3736 Prev ver could work only on x86 nick parents: 2506 diff changeset	63 *dest++ = 0;
2504 13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	64 }
13e1c5ab417a vo_vesa: rgb2rgb support nick parents: diff changeset	65 }
2505 2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	66
2677 794dec2fae64 using const modifier nick parents: 2564 diff changeset	67 void rgb32to24(const uint8_t src,uint8_t dst,uint32_t src_size)
2505 2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	68 {
2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	69 uint8_t *dest = dst;
2677 794dec2fae64 using const modifier nick parents: 2564 diff changeset	70 const uint8_t *s = src;
794dec2fae64 using const modifier nick parents: 2564 diff changeset	71 const uint8_t *end;
2517 3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	72 #ifdef HAVE_MMX
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	73 uint8_t *mm_end;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	74 #endif
2505 2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	75 end = s + src_size;
2517 3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	76 #ifdef HAVE_MMX
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	77 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
2517 3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	78 mm_end = (uint8_t)((((unsigned long)end)/(MMREG_SIZE2))(MMREG_SIZE2));
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	79 __asm __volatile(
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	80 "movq %0, %%mm7\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	81 "movq %1, %%mm6"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	82 ::"m"(mask24l),"m"(mask24h):"memory");
2517 3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	83 if(mm_end == end) mm_end -= MMREG_SIZE*2;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	84 while(s < mm_end)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	85 {
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	86 __asm __volatile(
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	87 PREFETCH" 32%1\n\t"
2517 3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	88 "movq %1, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	89 "movq 8%1, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	90 "movq %%mm0, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	91 "movq %%mm1, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	92 "psrlq $8, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	93 "psrlq $8, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	94 "pand %%mm7, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	95 "pand %%mm7, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	96 "pand %%mm6, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	97 "pand %%mm6, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	98 "por %%mm2, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	99 "por %%mm3, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	100 MOVNTQ" %%mm0, %0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	101 MOVNTQ" %%mm1, 6%0"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	102 :"=m"(*dest)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	103 :"m"(*s)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	104 :"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	105 dest += 12;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	106 s += 16;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	107 }
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	108 __asm __volatile(SFENCE:::"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	109 __asm __volatile(EMMS:::"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization nick parents: 2516 diff changeset	110 #endif
2505 2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	111 while(s < end)
2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	112 {
2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	113 dest++ = s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	114 dest++ = s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	115 dest++ = s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	116 s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	117 }
2aaa11d22f91 vo_vesa: more rgb2rgb support nick parents: 2504 diff changeset	118 }
2506 501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	119
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	120 /*
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	121 Original by Strepto/Astral
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	122 ported to gcc & bugfixed : A'rpi
2564 3d04a0991dce cosmetic nick parents: 2538 diff changeset	123 MMX2, 3DNOW optimization by Nick Kurshev
2698 22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	124 32bit c version, and and&add trick by Michael Niedermayer
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	125 */
2677 794dec2fae64 using const modifier nick parents: 2564 diff changeset	126 void rgb15to16(const uint8_t src,uint8_t dst,uint32_t src_size)
2506 501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	127 {
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	128 #ifdef HAVE_MMX
2677 794dec2fae64 using const modifier nick parents: 2564 diff changeset	129 register const char* s=src+src_size;
2506 501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	130 register char* d=dst+src_size;
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	131 register int offs=-src_size;
2698 22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	132 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs)));
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	133 __asm __volatile(
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	134 "movq %0, %%mm4\n\t"
2698 22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	135 ::"m"(mask15s));
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	136 while(offs<0)
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	137 {
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	138 __asm __volatile(
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	139 PREFETCH" 32%1\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	140 "movq %1, %%mm0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	141 "movq 8%1, %%mm2\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	142 "movq %%mm0, %%mm1\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	143 "movq %%mm2, %%mm3\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	144 "pand %%mm4, %%mm0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	145 "pand %%mm4, %%mm2\n\t"
2698 22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	146 "paddw %%mm1, %%mm0\n\t"
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	147 "paddw %%mm3, %%mm2\n\t"
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	148 MOVNTQ" %%mm0, %0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	149 MOVNTQ" %%mm2, 8%0"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	150 :"=m"(*(d+offs))
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	151 :"m"(*(s+offs))
2698 22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	152 );
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	153 offs+=16;
2506 501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	154 }
2538 71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	155 __asm __volatile(SFENCE:::"memory");
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile nick parents: 2535 diff changeset	156 __asm __volatile(EMMS:::"memory");
2506 501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	157 #else
2698 22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	158 #if 0
2677 794dec2fae64 using const modifier nick parents: 2564 diff changeset	159 const uint16_t s1=( uint16_t )src;
2506 501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	160 uint16_t d1=( uint16_t )dst;
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	161 uint16_t e=((uint8_t )s1)+src_size;
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	162 while( s1<e ){
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	163 register int x=*( s1++ );
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	164 /* rrrrrggggggbbbbb
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	165 0rrrrrgggggbbbbb
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	166 0111 1111 1110 0000=0x7FE0
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	167 00000000000001 1111=0x001F */
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	168 *( d1++ )=( x&0x001F )\|( ( x&0x7FE0 )<<1 );
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	169 }
2698 22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	170 #else
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	171 const uint32_t s1=( uint32_t )src;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	172 uint32_t d1=( uint32_t )dst;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	173 int i;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	174 int size= src_size>>2;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	175 for(i=0; i<size; i++)
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	176 {
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	177 register int x= s1[i];
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	178 // d1[i] = x + (x&0x7FE07FE0); //faster but need msbit =0 which might not allways be true
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	179 d1[i] = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	180
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	181 }
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster michael parents: 2697 diff changeset	182 #endif
2506 501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	183 #endif
501752469c39 vo_vesa: more rgb2rgb support nick parents: 2505 diff changeset	184 }
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	185
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	186 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	187 * Pallete is assumed to contain bgr32
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	188 */
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	189 void palette8torgb32(const uint8_t src, uint8_t dst, uint32_t num_pixels, const uint8_t *palette)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	190 {
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	191 uint32_t i;
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	192 for(i=0; i<num_pixels; i++)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	193 ((uint32_t )dst)[i] = ((uint32_t )palette)[ src[i] ];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	194 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	195
2697 1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	196 /**
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	197 * Pallete is assumed to contain bgr32
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	198 */
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	199 void palette8torgb24(const uint8_t src, uint8_t dst, uint32_t num_pixels, const uint8_t *palette)
2697 1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	200 {
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	201 uint32_t i;
2697 1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	202 /*
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	203 writes 1 byte o much and might cause alignment issues on some architectures?
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	204 for(i=0; i<num_pixels; i++)
2697 1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	205 ((uint32_t )(&dst[i3])) = ((uint32_t *)palette)[ src[i] ];
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	206 */
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	207 for(i=0; i<num_pixels; i++)
2697 1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	208 {
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	209 //FIXME slow?
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	210 dst[0]= palette[ src[i]*4+0 ];
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	211 dst[1]= palette[ src[i]*4+1 ];
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	212 dst[2]= palette[ src[i]*4+2 ];
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	213 dst+= 3;
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	214 }
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	215 }
1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	216
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	217 void rgb32to16(const uint8_t src, uint8_t dst, uint32_t num_pixels)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	218 {
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	219 uint32_t i;
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	220 for(i=0; i<num_pixels; i+=4)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	221 {
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	222 const int b= src[i+0];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	223 const int g= src[i+1];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	224 const int r= src[i+2];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	225
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	226 ((uint16_t *)dst)[i]= (b>>3) \| ((g&0xFC)<<3) \| ((r&0xF8)<<8);
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	227 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	228 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	229
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	230 void rgb32to15(const uint8_t src, uint8_t dst, uint32_t num_pixels)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	231 {
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	232 uint32_t i;
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	233 for(i=0; i<num_pixels; i+=4)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	234 {
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	235 const int b= src[i+0];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	236 const int g= src[i+1];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	237 const int r= src[i+2];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	238
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	239 ((uint16_t *)dst)[i]= (b>>3) \| ((g&0xF8)<<3) \| ((r&0xF8)<<7);
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	240 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	241 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	242
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	243
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	244 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	245 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	246 */
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	247 void palette8torgb16(const uint8_t src, uint8_t dst, uint32_t num_pixels, const uint8_t *palette)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	248 {
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	249 uint32_t i;
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	250 for(i=0; i<num_pixels; i++)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	251 ((uint16_t )dst)[i] = ((uint16_t )palette)[ src[i] ];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	252 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	253
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	254 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	255 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	256 */
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	257 void palette8torgb15(const uint8_t src, uint8_t dst, uint32_t num_pixels, const uint8_t *palette)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	258 {
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	259 uint32_t i;
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	260 for(i=0; i<num_pixels; i++)
2694 2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff) michael parents: 2677 diff changeset	261 ((uint16_t )dst)[i] = ((uint16_t )palette)[ src[i] ];
2697 1eaf3f89e49f palette to bgr24 michael parents: 2694 diff changeset	262 }
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	263 /**
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	264 *
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	265 * num_pixels must be a multiple of 16 for the MMX version
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	266 */
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	267 void yv12toyuy2(const uint8_t ysrc, const uint8_t usrc, const uint8_t vsrc, uint8_t dst, uint32_t num_pixels)
2701 9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	268 {
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	269 #ifdef HAVE_MMX
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	270 asm volatile(
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	271 "xorl %%eax, %%eax \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	272 "1: \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	273 PREFETCH" 32(%1, %%eax, 2) \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	274 PREFETCH" 32(%2, %%eax) \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	275 PREFETCH" 32(%3, %%eax) \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	276 "movq (%2, %%eax), %%mm0 \n\t" // U(0)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	277 "movq %%mm0, %%mm2 \n\t" // U(0)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	278 "movq (%3, %%eax), %%mm1 \n\t" // V(0)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	279 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	280 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	281
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	282 "movq (%1, %%eax,2), %%mm3 \n\t" // Y(0)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	283 "movq 8(%1, %%eax,2), %%mm5 \n\t" // Y(8)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	284 "movq %%mm3, %%mm4 \n\t" // Y(0)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	285 "movq %%mm5, %%mm6 \n\t" // Y(8)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	286 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	287 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	288 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	289 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	290
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	291 MOVNTQ" %%mm3, (%0, %%eax, 4) \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	292 MOVNTQ" %%mm4, 8(%0, %%eax, 4) \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	293 MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	294 MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	295
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	296 "addl $8, %%eax \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	297 "cmpl %4, %%eax \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	298 " jb 1b \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	299 EMMS" \n\t"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	300 SFENCE
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	301 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (num_pixels>>1)
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	302 : "memory", "%eax"
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	303 );
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	304
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	305 #else
2701 9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	306 int i;
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	307 num_pixels>>=1;
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	308 for(i=0; i<num_pixels; i++)
2701 9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	309 {
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	310 dst[4i+0] = ysrc[2i+0];
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	311 dst[4*i+1] = usrc[i];
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	312 dst[4i+2] = ysrc[2i+1];
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	313 dst[4*i+3] = vsrc[i];
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	314 }
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	315 #endif
2701 9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	316 }
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	317
2711 84dff4aac89e More standards compilance nick parents: 2704 diff changeset	318 void yuy2toyv12(const uint8_t src, uint8_t ydst, uint8_t udst, uint8_t vdst, uint32_t num_pixels)
2701 9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	319 {
2704 b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	320 #ifdef HAVE_MMX
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	321 asm volatile(
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	322 "xorl %%eax, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	323 "pcmpeqw %%mm7, %%mm7 \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	324 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	325 "1: \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	326 PREFETCH" 64(%0, %%eax, 4) \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	327 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	328 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	329 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	330 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	331 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	332 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	333 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	334 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	335 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	336 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	337
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	338 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	339
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	340 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	341 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(12)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	342 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	343 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	344 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	345 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	346 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	347 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	348 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	349 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	350
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	351 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	352
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	353 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	354 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	355 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	356 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	357 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	358 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	359 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	360 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	361
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	362 MOVNTQ" %%mm0, (%3, %%eax) \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	363 MOVNTQ" %%mm2, (%2, %%eax) \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	364
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	365 "addl $8, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	366 "cmpl %4, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	367 " jb 1b \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	368 EMMS" \n\t"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	369 SFENCE
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	370 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (num_pixels>>1)
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	371 : "memory", "%eax"
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	372 );
b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	373 #else
2701 9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	374 int i;
2702 440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	375 num_pixels>>=1;
440312d953a8 yv12toyuy2 in MMX michael parents: 2701 diff changeset	376 for(i=0; i<num_pixels; i++)
2701 9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	377 {
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	378 ydst[2i+0] = src[4i+0];
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	379 udst[i] = src[4*i+1];
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	380 ydst[2i+1] = src[4i+2];
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	381 vdst[i] = src[4*i+3];
9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	382 }
2704 b4c6699d3893 yuy2toyv12 in MMX michael parents: 2702 diff changeset	383 #endif
2701 9b47bc409083 yv12 <-> yuy2 in C michael parents: 2698 diff changeset	384 }

Mercurial > mplayer.hg

annotate postproc/rgb2rgb.c @ 2717:5fa8c079ee3c