annotate postproc/rgb2rgb.c @ 2723:22aba8af94af

fixed yv12toyuy2
author michael
date Mon, 05 Nov 2001 18:26:49 +0000
parents 4ba64e254042
children c08b7af26782
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
1 /*
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
2 *
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
3 * rgb2rgb.c, Software RGB to RGB convertor
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
4 * Written by Nick Kurshev.
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
5 * palette stuff & yuv stuff by Michael
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
6 */
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
7 #include <inttypes.h>
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
8 #include "../config.h"
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
9 #include "rgb2rgb.h"
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
10 #include "../mmx_defs.h"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
11
2535
b44113f46c96 cant compile on non x86 bugfix
michael
parents: 2517
diff changeset
12 #ifdef HAVE_MMX
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
13 static const uint64_t mask32 __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
14 static const uint64_t mask24l __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
15 static const uint64_t mask24h __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
16 static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
17 static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
18 static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
2535
b44113f46c96 cant compile on non x86 bugfix
michael
parents: 2517
diff changeset
19 #endif
2513
nick
parents: 2512
diff changeset
20
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
21 void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
22 {
2508
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
23 uint8_t *dest = dst;
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
24 const uint8_t *s = src;
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
25 const uint8_t *end;
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
26 #ifdef HAVE_MMX
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
27 uint8_t *mm_end;
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
28 #endif
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
29 end = s + src_size;
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
30 #ifdef HAVE_MMX
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
31 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
2516
9ef4fa15b780 More elegant solution
nick
parents: 2514
diff changeset
32 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2));
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
33 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
2516
9ef4fa15b780 More elegant solution
nick
parents: 2514
diff changeset
34 if(mm_end == end) mm_end -= MMREG_SIZE*2;
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
35 while(s < mm_end)
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
36 {
2511
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32
nick
parents: 2510
diff changeset
37 __asm __volatile(
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
38 PREFETCH" 32%1\n\t"
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
39 "movd %1, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
40 "movd 3%1, %%mm1\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
41 "movd 6%1, %%mm2\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
42 "movd 9%1, %%mm3\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
43 "punpckldq %%mm1, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
44 "punpckldq %%mm3, %%mm2\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
45 "pand %%mm7, %%mm0\n\t"
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
46 "pand %%mm7, %%mm2\n\t"
2511
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32
nick
parents: 2510
diff changeset
47 MOVNTQ" %%mm0, %0\n\t"
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32
nick
parents: 2510
diff changeset
48 MOVNTQ" %%mm2, 8%0"
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
49 :"=m"(*dest)
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
50 :"m"(*s)
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
51 :"memory");
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
52 dest += 16;
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
53 s += 12;
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
54 }
2513
nick
parents: 2512
diff changeset
55 __asm __volatile(SFENCE:::"memory");
2511
6db23dd30242 mmx, mmx2, 3dnow optimized 24to32
nick
parents: 2510
diff changeset
56 __asm __volatile(EMMS:::"memory");
2510
42e1ae2c8f5f mmx optimized 24to32
nick
parents: 2508
diff changeset
57 #endif
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
58 while(s < end)
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
59 {
2508
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
60 *dest++ = *s++;
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
61 *dest++ = *s++;
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
62 *dest++ = *s++;
94f9825a3736 Prev ver could work only on x86
nick
parents: 2506
diff changeset
63 *dest++ = 0;
2504
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
64 }
13e1c5ab417a vo_vesa: rgb2rgb support
nick
parents:
diff changeset
65 }
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
66
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
67 void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
68 {
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
69 uint8_t *dest = dst;
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
70 const uint8_t *s = src;
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
71 const uint8_t *end;
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
72 #ifdef HAVE_MMX
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
73 uint8_t *mm_end;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
74 #endif
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
75 end = s + src_size;
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
76 #ifdef HAVE_MMX
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
77 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
78 mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2));
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
79 __asm __volatile(
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
80 "movq %0, %%mm7\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
81 "movq %1, %%mm6"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
82 ::"m"(mask24l),"m"(mask24h):"memory");
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
83 if(mm_end == end) mm_end -= MMREG_SIZE*2;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
84 while(s < mm_end)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
85 {
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
86 __asm __volatile(
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
87 PREFETCH" 32%1\n\t"
2517
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
88 "movq %1, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
89 "movq 8%1, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
90 "movq %%mm0, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
91 "movq %%mm1, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
92 "psrlq $8, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
93 "psrlq $8, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
94 "pand %%mm7, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
95 "pand %%mm7, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
96 "pand %%mm6, %%mm2\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
97 "pand %%mm6, %%mm3\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
98 "por %%mm2, %%mm0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
99 "por %%mm3, %%mm1\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
100 MOVNTQ" %%mm0, %0\n\t"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
101 MOVNTQ" %%mm1, 6%0"
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
102 :"=m"(*dest)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
103 :"m"(*s)
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
104 :"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
105 dest += 12;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
106 s += 16;
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
107 }
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
108 __asm __volatile(SFENCE:::"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
109 __asm __volatile(EMMS:::"memory");
3d507ef1e3ed 32to24: MMX, MMX2, 3DNOW optimization
nick
parents: 2516
diff changeset
110 #endif
2505
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
111 while(s < end)
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
112 {
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
113 *dest++ = *s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
114 *dest++ = *s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
115 *dest++ = *s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
116 s++;
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
117 }
2aaa11d22f91 vo_vesa: more rgb2rgb support
nick
parents: 2504
diff changeset
118 }
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
119
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
120 /*
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
121 Original by Strepto/Astral
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
122 ported to gcc & bugfixed : A'rpi
2564
3d04a0991dce cosmetic
nick
parents: 2538
diff changeset
123 MMX2, 3DNOW optimization by Nick Kurshev
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
124 32bit c version, and and&add trick by Michael Niedermayer
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
125 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
126 void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size)
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
127 {
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
128 #ifdef HAVE_MMX
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
129 register const char* s=src+src_size;
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
130 register char* d=dst+src_size;
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
131 register int offs=-src_size;
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
132 __asm __volatile(PREFETCH" %0"::"m"(*(s+offs)));
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
133 __asm __volatile(
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
134 "movq %0, %%mm4\n\t"
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
135 ::"m"(mask15s));
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
136 while(offs<0)
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
137 {
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
138 __asm __volatile(
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
139 PREFETCH" 32%1\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
140 "movq %1, %%mm0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
141 "movq 8%1, %%mm2\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
142 "movq %%mm0, %%mm1\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
143 "movq %%mm2, %%mm3\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
144 "pand %%mm4, %%mm0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
145 "pand %%mm4, %%mm2\n\t"
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
146 "paddw %%mm1, %%mm0\n\t"
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
147 "paddw %%mm3, %%mm2\n\t"
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
148 MOVNTQ" %%mm0, %0\n\t"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
149 MOVNTQ" %%mm2, 8%0"
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
150 :"=m"(*(d+offs))
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
151 :"m"(*(s+offs))
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
152 );
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
153 offs+=16;
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
154 }
2538
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
155 __asm __volatile(SFENCE:::"memory");
71320898b333 Finish mmx2, 3dnow optimiz. 15to16 should be tested. Better fix of can't compile
nick
parents: 2535
diff changeset
156 __asm __volatile(EMMS:::"memory");
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
157 #else
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
158 #if 0
2677
794dec2fae64 using const modifier
nick
parents: 2564
diff changeset
159 const uint16_t *s1=( uint16_t * )src;
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
160 uint16_t *d1=( uint16_t * )dst;
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
161 uint16_t *e=((uint8_t *)s1)+src_size;
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
162 while( s1<e ){
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
163 register int x=*( s1++ );
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
164 /* rrrrrggggggbbbbb
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
165 0rrrrrgggggbbbbb
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
166 0111 1111 1110 0000=0x7FE0
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
167 00000000000001 1111=0x001F */
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
168 *( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 );
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
169 }
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
170 #else
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
171 const unsigned *s1=( unsigned * )src;
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
172 unsigned *d1=( unsigned * )dst;
2698
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
173 int i;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
174 int size= src_size>>2;
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
175 for(i=0; i<size; i++)
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
176 {
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
177 register int x= s1[i];
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
178 // d1[i] = x + (x&0x7FE07FE0); //faster but need msbit =0 which might not allways be true
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
179 d1[i] = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
180
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
181 }
22652c028692 faster 15to16 bit rgb (the mmx routine is limited by memory speed so there is no difference ): but the c routine is faster
michael
parents: 2697
diff changeset
182 #endif
2506
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
183 #endif
501752469c39 vo_vesa: more rgb2rgb support
nick
parents: 2505
diff changeset
184 }
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
185
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
186 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
187 * Pallete is assumed to contain bgr32
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
188 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
189 void palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
190 {
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
191 unsigned i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
192 for(i=0; i<num_pixels; i++)
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
193 ((unsigned *)dst)[i] = ((unsigned *)palette)[ src[i] ];
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
194 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
195
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
196 /**
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
197 * Pallete is assumed to contain bgr32
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
198 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
199 void palette8torgb24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
200 {
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
201 unsigned i;
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
202 /*
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
203 writes 1 byte o much and might cause alignment issues on some architectures?
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
204 for(i=0; i<num_pixels; i++)
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
205 ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ];
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
206 */
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
207 for(i=0; i<num_pixels; i++)
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
208 {
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
209 //FIXME slow?
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
210 dst[0]= palette[ src[i]*4+0 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
211 dst[1]= palette[ src[i]*4+1 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
212 dst[2]= palette[ src[i]*4+2 ];
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
213 dst+= 3;
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
214 }
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
215 }
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
216
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
217 void rgb32to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
218 {
2719
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
219 unsigned j,i,num_pixels=src_size/4;
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
220 uint16_t *d = (uint16_t *)dst;
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
221 for(i=0,j=0; j<num_pixels; i+=4,j++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
222 {
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
223 const int b= src[i+0];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
224 const int g= src[i+1];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
225 const int r= src[i+2];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
226
2719
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
227 d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
228 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
229 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
230
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
231 void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
232 {
2719
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
233 unsigned j,i,num_pixels=src_size/4;
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
234 uint16_t *d = (uint16_t *)dst;
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
235 for(i=0,j=0; j<num_pixels; i+=4,j++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
236 {
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
237 const int b= src[i+0];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
238 const int g= src[i+1];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
239 const int r= src[i+2];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
240
2720
4ba64e254042 Fixed rgb32(24)to15 stuff
nick
parents: 2719
diff changeset
241 d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
242 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
243 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
244
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
245 void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
246 {
2719
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
247 unsigned j,i,num_pixels=src_size/3;
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
248 uint16_t *d = (uint16_t *)dst;
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
249 for(i=0,j=0; j<num_pixels; i+=3,j++)
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
250 {
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
251 const int b= src[i+0];
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
252 const int g= src[i+1];
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
253 const int r= src[i+2];
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
254
2719
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
255 d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
256 }
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
257 }
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
258
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
259 void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
260 {
2719
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
261 unsigned j,i,num_pixels=src_size/3;
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
262 uint16_t *d = (uint16_t *)dst;
fafa73d6d80c Fixed rgb32(24)to16 stuff, rgb32(24)to15 is still broken
nick
parents: 2718
diff changeset
263 for(i=0,j=0; j<num_pixels; i+=3,j++)
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
264 {
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
265 const int b= src[i+0];
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
266 const int g= src[i+1];
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
267 const int r= src[i+2];
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
268
2720
4ba64e254042 Fixed rgb32(24)to15 stuff
nick
parents: 2719
diff changeset
269 d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
270 }
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
271 }
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
272
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
273 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
274 * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
275 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
276 void palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
277 {
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
278 unsigned i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
279 for(i=0; i<num_pixels; i++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
280 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
281 }
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
282
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
283 /**
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
284 * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
285 */
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
286 void palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
287 {
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
288 unsigned i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
289 for(i=0; i<num_pixels; i++)
2694
2924350d92ed bgr32to16, bgr32to15 (needed for palette stuff)
michael
parents: 2677
diff changeset
290 ((uint16_t *)dst)[i] = ((uint16_t *)palette)[ src[i] ];
2697
1eaf3f89e49f palette to bgr24
michael
parents: 2694
diff changeset
291 }
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
292 /**
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
293 *
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
294 * width must be a multiple of 16 for the MMX version
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
295 */
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
296 void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
297 int width, int height, int lumStride, int chromStride, int dstStride)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
298 {
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
299 int y;
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
300 const int chromWidth= width>>1;
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
301 for(y=0; y<height; y++)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
302 {
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
303 #ifdef HAVE_MMX
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
304 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
305 asm volatile(
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
306 "xorl %%eax, %%eax \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
307 "1: \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
308 PREFETCH" 32(%1, %%eax, 2) \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
309 PREFETCH" 32(%2, %%eax) \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
310 PREFETCH" 32(%3, %%eax) \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
311 "movq (%2, %%eax), %%mm0 \n\t" // U(0)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
312 "movq %%mm0, %%mm2 \n\t" // U(0)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
313 "movq (%3, %%eax), %%mm1 \n\t" // V(0)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
314 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
315 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
316
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
317 "movq (%1, %%eax,2), %%mm3 \n\t" // Y(0)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
318 "movq 8(%1, %%eax,2), %%mm5 \n\t" // Y(8)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
319 "movq %%mm3, %%mm4 \n\t" // Y(0)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
320 "movq %%mm5, %%mm6 \n\t" // Y(8)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
321 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
322 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
323 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
324 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
325
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
326 MOVNTQ" %%mm3, (%0, %%eax, 4) \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
327 MOVNTQ" %%mm4, 8(%0, %%eax, 4) \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
328 MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
329 MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t"
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
330
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
331 "addl $8, %%eax \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
332 "cmpl %4, %%eax \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
333 " jb 1b \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
334 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
335 : "%eax"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
336 );
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
337 #else
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
338 int i;
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
339 for(i=0; i<chromWidth; i++)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
340 {
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
341 dst[4*i+0] = ysrc[2*i+0];
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
342 dst[4*i+1] = usrc[i];
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
343 dst[4*i+2] = ysrc[2*i+1];
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
344 dst[4*i+3] = vsrc[i];
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
345 }
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
346 #endif
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
347 if(y&1)
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
348 {
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
349 usrc += chromStride;
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
350 vsrc += chromStride;
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
351 }
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
352 ysrc += lumStride;
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
353 dst += dstStride;
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
354 }
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
355 #ifdef HAVE_MMX
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
356 asm( EMMS" \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
357 SFENCE" \n\t"
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
358 :::"memory");
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
359 #endif
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
360 }
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
361
2718
9c5e64493742 Well - old algorithms and new stuff rgb24to16(15)
nick
parents: 2711
diff changeset
362 void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, unsigned num_pixels)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
363 {
2704
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
364 #ifdef HAVE_MMX
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
365 asm volatile(
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
366 "xorl %%eax, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
367 "pcmpeqw %%mm7, %%mm7 \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
368 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
369 "1: \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
370 PREFETCH" 64(%0, %%eax, 4) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
371 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
372 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
373 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
374 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
375 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
376 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
377 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
378 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
379 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
380 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
381
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
382 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
383
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
384 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
385 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(12)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
386 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
387 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
388 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
389 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
390 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
391 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
392 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
393 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
394
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
395 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
396
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
397 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
398 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
399 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
400 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
401 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
402 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
403 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
404 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
405
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
406 MOVNTQ" %%mm0, (%3, %%eax) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
407 MOVNTQ" %%mm2, (%2, %%eax) \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
408
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
409 "addl $8, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
410 "cmpl %4, %%eax \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
411 " jb 1b \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
412 EMMS" \n\t"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
413 SFENCE
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
414 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (num_pixels>>1)
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
415 : "memory", "%eax"
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
416 );
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
417 #else
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
418 int i;
2702
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
419 num_pixels>>=1;
440312d953a8 yv12toyuy2 in MMX
michael
parents: 2701
diff changeset
420 for(i=0; i<num_pixels; i++)
2701
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
421 {
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
422 ydst[2*i+0] = src[4*i+0];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
423 udst[i] = src[4*i+1];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
424 ydst[2*i+1] = src[4*i+2];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
425 vdst[i] = src[4*i+3];
9b47bc409083 yv12 <-> yuy2 in C
michael
parents: 2698
diff changeset
426 }
2704
b4c6699d3893 yuy2toyv12 in MMX
michael
parents: 2702
diff changeset
427 #endif
2723
22aba8af94af fixed yv12toyuy2
michael
parents: 2720
diff changeset
428 }