annotate postproc/rgb2rgb.c @ 2717:5fa8c079ee3c

fix small xshape bug
author pontscho
date Mon, 05 Nov 2001 17:00:42 +0000
parents 84dff4aac89e
children 9c5e64493742
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
1 /*
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
2 *
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
3 * rgb2rgb.c, Software RGB to RGB convertor
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
4 * Written by Nick Kurshev.
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
5 * palette stuff & yuv stuff by Michael
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
6 */
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
7 #include <inttypes.h>
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
8 #include "../config.h"
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
9 #include "rgb2rgb.h"
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
10 #include "../mmx_defs.h"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
11
2535
b44113f46c96 cant compile on non x86 bugfix
michael
parents: 2517
diff changeset
12 #ifdef HAVE_MMX
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
13 static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
14 static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
15 static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
16 static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
17 static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
18 static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
2535
b44113f46c96 cant compile on non x86 bugfix
michael
parents: 2517
diff changeset
19 #endif
2513
nick
parents: 2512
diff changeset
20
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
21 void rgb24to32(const uint8_t *src,uint8_t *dst,uint32_t src_size)
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
22 {
2508
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
23 uint8_t *dest = dst;
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
24 const uint8_t *s = src;
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
25 const uint8_t *end;
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
26 #ifdef HAVE_MMX
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
27 uint8_t *mm_end;
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
28 #endif
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
29 end = s + src_size;
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
30 #ifdef HAVE_MMX
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
31 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
2516
9ef4fa15b780 More elegant solution
nick
parents: 2514
diff changeset
32 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2));
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
33 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
2516
9ef4fa15b780 More elegant solution
nick
parents: 2514
diff changeset
34 if(mm_end == end) mm_end -= MMREG_SIZE*2;
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
35 while(s < mm_end)
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
36 {
2511
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32
nick
parents: 2510
diff changeset
37 __asm __volatile(
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
38 PREFETCH" 32%1\n\t"
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
39 "movd %1, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
40 "movd 3%1, %%mm1\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
41 "movd 6%1, %%mm2\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
42 "movd 9%1, %%mm3\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
43 "punpckldq %%mm1, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
44 "punpckldq %%mm3, %%mm2\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
45 "pand %%mm7, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
46 "pand %%mm7, %%mm2\n\t"
2511
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32
nick
parents: 2510
diff changeset
47 MOVNTQ" %%mm0, %0\n\t"
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32
nick
parents: 2510
diff changeset
48 MOVNTQ" %%mm2, 8%0"
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
49 :"=m"(*dest)
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
50 :"m"(*s)
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
51 :"memory");
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
52 dest += 16;
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
53 s += 12;
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
54 }
2513
nick
parents: 2512
diff changeset
55 __asm __volatile(SFENCE:::"memory");
2511
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32
nick
parents: 2510
diff changeset
56 __asm __volatile(EMMS:::"memory");
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
57 #endif
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
58 while(s < end)
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
59 {
2508
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
60 *dest++ = *s++;
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
61 *dest++ = *s++;
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
62 *dest++ = *s++;
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
63 *dest++ = 0;
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
64 }
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
65 }
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
66
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
67 void rgb32to24(const uint8_t *src,uint8_t *dst,uint32_t src_size)
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
68 {
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
69 uint8_t *dest = dst;
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
70 const uint8_t *s = src;
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
71 const uint8_t *end;
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
72 #ifdef HAVE_MMX
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
73 uint8_t *mm_end;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
74 #endif
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
75 end = s + src_size;
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
76 #ifdef HAVE_MMX
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
77 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
78 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2));
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
79 __asm __volatile(
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
80 "movq %0, %%mm7\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
81 "movq %1, %%mm6"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
82 ::"m"(mask24l),"m"(mask24h):"memory");
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
83 if(mm_end == end) mm_end -= MMREG_SIZE*2;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
84 while(s < mm_end)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
85 {
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
86 __asm __volatile(
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
87 PREFETCH" 32%1\n\t"
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
88 "movq %1, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
89 "movq 8%1, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
90 "movq %%mm0, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
91 "movq %%mm1, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
92 "psrlq $8, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
93 "psrlq $8, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
94 "pand %%mm7, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
95 "pand %%mm7, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
96 "pand %%mm6, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
97 "pand %%mm6, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
98 "por %%mm2, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
99 "por %%mm3, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
100 MOVNTQ" %%mm0, %0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
101 MOVNTQ" %%mm1, 6%0"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
102 :"=m"(*dest)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
103 :"m"(*s)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
104 :"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
105 dest += 12;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
106 s += 16;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
107 }
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
108 __asm __volatile(SFENCE:::"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
109 __asm __volatile(EMMS:::"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
110 #endif
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
111 while(s < end)
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
112 {
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
113 *dest++ = *s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
114 *dest++ = *s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
115 *dest++ = *s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
116 s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
117 }
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
118 }
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
119
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
120 /*
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
121 Original by Strepto/Astral
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
122 ported to gcc & bugfixed : A'rpi
2564
3d04a0991dce cosmetic
nick
parents: 2538
diff changeset
123 MMX2, 3DNOW optimization by Nick Kurshev
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
124 32bit c version, and and&add trick by Michael Niedermayer
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
125 */
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
126 void rgb15to16(const uint8_t *src,uint8_t *dst,uint32_t src_size)
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
127 {
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
128 #ifdef HAVE_MMX
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
129 register const char* s=src+src_size;
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
130 register char* d=dst+src_size;
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
131 register int offs=-src_size;
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
132 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs)));
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
133 __asm __volatile(
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
134 "movq %0, %%mm4\n\t"
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
135 ::"m"(mask15s));
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
136 while(offs<0)
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
137 {
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
138 __asm __volatile(
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
139 PREFETCH" 32%1\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
140 "movq %1, %%mm0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
141 "movq 8%1, %%mm2\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
142 "movq %%mm0, %%mm1\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
143 "movq %%mm2, %%mm3\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
144 "pand %%mm4, %%mm0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
145 "pand %%mm4, %%mm2\n\t"
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
146 "paddw %%mm1, %%mm0\n\t"
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
147 "paddw %%mm3, %%mm2\n\t"
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
148 MOVNTQ" %%mm0, %0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
149 MOVNTQ" %%mm2, 8%0"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
150 :"=m"(*(d+offs))
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
151 :"m"(*(s+offs))
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
152 );
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
153 offs+=16;
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
154 }
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
155 __asm __volatile(SFENCE:::"memory");
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
156 __asm __volatile(EMMS:::"memory");
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
157 #else
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
158 #if 0
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
159 const uint16_t *s1=( uint16_t * )src;
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
160 uint16_t *d1=( uint16_t * )dst;
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
161 uint16_t *e=((uint8_t *)s1)+src_size;
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
162 while( s1<e ){
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
163 register int x=*( s1++ );
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
164 /* rrrrrggggggbbbbb
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
165 0rrrrrgggggbbbbb
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
166 0111 1111 1110 0000=0x7FE0
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
167 00000000000001 1111=0x001F */
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
168 *( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 );
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
169 }
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
170 #else
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
171 const uint32_t *s1=( uint32_t * )src;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
172 uint32_t *d1=( uint32_t * )dst;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
173 int i;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
174 int size= src_size>>2;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
175 for(i=0; i<size; i++)
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
176 {
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
177 register int x= s1[i];
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
178 // d1[i] = x + (x&0x7FE07FE0); //faster but need msbit =0 which might not allways be true
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
179 d1[i] = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
180
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
181 }
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
182 #endif
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
183 #endif
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
184 }
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
185
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
186 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
187 * Pallete is assumed to contain bgr32
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
188 */
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
189 void palette8torgb32(const uint8_t *src, uint8_t *dst, uint32_t num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
190 {
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
191 uint32_t i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
192 for(i=0; i<num_pixels; i++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
193 ((uint32_t *)dst)[i] = ((uint32_t *)palette)[ src[i] ];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
194 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
195
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
196 /**
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
197 * Pallete is assumed to contain bgr32
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
198 */
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
199 void palette8torgb24(const uint8_t *src, uint8_t *dst, uint32_t num_pixels, const uint8_t *palette)
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
200 {
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
201 uint32_t i;
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
202 /*
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
203 writes 1 byte o much and might cause alignment issues on some architectures?
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
204 for(i=0; i<num_pixels; i++)
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
205 ((uint32_t *)(&dst[i*3])) = ((uint32_t *)palette)[ src[i] ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
206 */
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
207 for(i=0; i<num_pixels; i++)
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
208 {
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
209 //FIXME slow?
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
210 dst[0]= palette[ src[i]*4+0 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
211 dst[1]= palette[ src[i]*4+1 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
212 dst[2]= palette[ src[i]*4+2 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
213 dst+= 3;
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
214 }
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
215 }
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
216
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
217 void rgb32to16(const uint8_t *src, uint8_t *dst, uint32_t num_pixels)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
218 {
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
219 uint32_t i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
220 for(i=0; i<num_pixels; i+=4)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
221 {
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
222 const int b= src[i+0];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
223 const int g= src[i+1];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
224 const int r= src[i+2];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
225
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
226 ((uint16_t *)dst)[i]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
227 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
228 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
229
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
230 void rgb32to15(const uint8_t *src, uint8_t *dst, uint32_t num_pixels)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
231 {
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
232 uint32_t i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
233 for(i=0; i<num_pixels; i+=4)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
234 {
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
235 const int b= src[i+0];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
236 const int g= src[i+1];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
237 const int r= src[i+2];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
238
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
239 ((uint16_t *)dst)[i]= (b>>3) | ((g&0xF8)<<3) | ((r&0xF8)<<7);
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
240 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
241 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
242
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
243
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
244 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
245 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
246 */
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
247 void palette8torgb16(const uint8_t *src, uint8_t *dst, uint32_t num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
248 {
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
249 uint32_t i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
250 for(i=0; i<num_pixels; i++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
251 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
252 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
253
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
254 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
255 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
256 */
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
257 void palette8torgb15(const uint8_t *src, uint8_t *dst, uint32_t num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
258 {
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
259 uint32_t i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
260 for(i=0; i<num_pixels; i++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
261 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
262 }
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
263 /**
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
264 *
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
265 * num_pixels must be a multiple of 16 for the MMX version
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
266 */
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
267 void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, uint32_t num_pixels)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
268 {
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
269 #ifdef HAVE_MMX
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
270 asm volatile(
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
271 "xorl %%eax, %%eax \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
272 "1: \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
273 PREFETCH" 32(%1, %%eax, 2) \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
274 PREFETCH" 32(%2, %%eax) \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
275 PREFETCH" 32(%3, %%eax) \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
276 "movq (%2, %%eax), %%mm0 \n\t" // U(0)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
277 "movq %%mm0, %%mm2 \n\t" // U(0)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
278 "movq (%3, %%eax), %%mm1 \n\t" // V(0)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
279 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
280 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
281
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
282 "movq (%1, %%eax,2), %%mm3 \n\t" // Y(0)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
283 "movq 8(%1, %%eax,2), %%mm5 \n\t" // Y(8)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
284 "movq %%mm3, %%mm4 \n\t" // Y(0)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
285 "movq %%mm5, %%mm6 \n\t" // Y(8)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
286 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
287 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
288 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
289 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
290
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
291 MOVNTQ" %%mm3, (%0, %%eax, 4) \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
292 MOVNTQ" %%mm4, 8(%0, %%eax, 4) \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
293 MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
294 MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
295
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
296 "addl $8, %%eax \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
297 "cmpl %4, %%eax \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
298 " jb 1b \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
299 EMMS" \n\t"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
300 SFENCE
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
301 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (num_pixels>>1)
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
302 : "memory", "%eax"
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
303 );
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
304
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
305 #else
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
306 int i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
307 num_pixels>>=1;
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
308 for(i=0; i<num_pixels; i++)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
309 {
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
310 dst[4*i+0] = ysrc[2*i+0];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
311 dst[4*i+1] = usrc[i];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
312 dst[4*i+2] = ysrc[2*i+1];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
313 dst[4*i+3] = vsrc[i];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
314 }
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
315 #endif
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
316 }
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
317
2711
84dff4aac89e More standards compilance
nick
parents: 2704
diff changeset
318 void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, uint32_t num_pixels)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
319 {
2704
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
320 #ifdef HAVE_MMX
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
321 asm volatile(
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
322 "xorl %%eax, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
323 "pcmpeqw %%mm7, %%mm7 \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
324 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
325 "1: \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
326 PREFETCH" 64(%0, %%eax, 4) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
327 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
328 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
329 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
330 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
331 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
332 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
333 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
334 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
335 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
336 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
337
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
338 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
339
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
340 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
341 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(12)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
342 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
343 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
344 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
345 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
346 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
347 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
348 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
349 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
350
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
351 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
352
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
353 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
354 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
355 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
356 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
357 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
358 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
359 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
360 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
361
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
362 MOVNTQ" %%mm0, (%3, %%eax) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
363 MOVNTQ" %%mm2, (%2, %%eax) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
364
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
365 "addl $8, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
366 "cmpl %4, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
367 " jb 1b \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
368 EMMS" \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
369 SFENCE
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
370 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (num_pixels>>1)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
371 : "memory", "%eax"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
372 );
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
373 #else
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
374 int i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
375 num_pixels>>=1;
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
376 for(i=0; i<num_pixels; i++)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
377 {
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
378 ydst[2*i+0] = src[4*i+0];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
379 udst[i] = src[4*i+1];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
380 ydst[2*i+1] = src[4*i+2];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
381 vdst[i] = src[4*i+3];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
382 }
2704
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
383 #endif
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
384 }