annotate libswscale/rgb2rgb_template.c @ 22997:fd0fda0c6555

skip MMX code in rgb24tobgr24 if the size of the input is smaller than the size of the units the MMX code processes
author ivo
date Wed, 18 Apr 2007 09:27:59 +0000
parents 2a60af5e78a7
children beb4ac492c5e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1 /*
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2 *
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3 * rgb2rgb.c, Software RGB to RGB convertor
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
4 * pluralize by Software PAL8 to RGB convertor
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
5 * Software YUV to YUV convertor
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
6 * Software YUV to RGB convertor
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
7 * Written by Nick Kurshev.
19703
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
8 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
9 * lot of big-endian byteorder fixes by Alex Beregszaszi
19703
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
10 *
20094
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 19703
diff changeset
11 * This file is part of FFmpeg.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 19703
diff changeset
12 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 19703
diff changeset
13 * FFmpeg is free software; you can redistribute it and/or modify
19703
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
14 * it under the terms of the GNU General Public License as published by
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
15 * the Free Software Foundation; either version 2 of the License, or
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
16 * (at your option) any later version.
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
17 *
20094
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 19703
diff changeset
18 * FFmpeg is distributed in the hope that it will be useful,
19703
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
21 * GNU General Public License for more details.
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
22 *
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
23 * You should have received a copy of the GNU General Public License
20094
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 19703
diff changeset
24 * along with FFmpeg; if not, write to the Free Software
19703
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents: 19396
diff changeset
25 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21029
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed.
lucabe
parents: 20724
diff changeset
26 *
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed.
lucabe
parents: 20724
diff changeset
27 * the C code (not assembly, mmx, ...) of this file can be used
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed.
lucabe
parents: 20724
diff changeset
28 * under the LGPL license too
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
29 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
30
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
31 #include <stddef.h>
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
32 #include <inttypes.h> /* for __WORDSIZE */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
33
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
34 #ifndef __WORDSIZE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
35 // #warning You have misconfigured system and probably will lose performance!
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
36 #define __WORDSIZE MP_WORDSIZE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
37 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
38
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
39 #undef PREFETCH
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
40 #undef MOVNTQ
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
41 #undef EMMS
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
42 #undef SFENCE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
43 #undef MMREG_SIZE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
44 #undef PREFETCHW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
45 #undef PAVGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
46
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
47 #ifdef HAVE_SSE2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
48 #define MMREG_SIZE 16
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
49 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
50 #define MMREG_SIZE 8
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
51 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
52
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
53 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
54 #define PREFETCH "prefetch"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
55 #define PREFETCHW "prefetchw"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
56 #define PAVGB "pavgusb"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
57 #elif defined ( HAVE_MMX2 )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
58 #define PREFETCH "prefetchnta"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
59 #define PREFETCHW "prefetcht0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
60 #define PAVGB "pavgb"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
61 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
62 #ifdef __APPLE__
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
63 #define PREFETCH "#"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
64 #define PREFETCHW "#"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
65 #else
20724
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20094
diff changeset
66 #define PREFETCH " # nop"
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20094
diff changeset
67 #define PREFETCHW " # nop"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
68 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
69 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
70
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
71 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
72 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
73 #define EMMS "femms"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
74 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
75 #define EMMS "emms"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
76 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
77
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
78 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
79 #define MOVNTQ "movntq"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
80 #define SFENCE "sfence"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
81 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
82 #define MOVNTQ "movq"
20724
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20094
diff changeset
83 #define SFENCE " # nop"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
84 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
85
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
86 static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
87 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
88 uint8_t *dest = dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
89 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
90 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
91 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
92 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
93 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
94 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
95 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
96 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
97 mm_end = end - 23;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
98 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
99 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
100 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
101 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
102 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
103 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
104 "punpckldq 3%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
105 "movd 6%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
106 "punpckldq 9%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
107 "movd 12%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
108 "punpckldq 15%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
109 "movd 18%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
110 "punpckldq 21%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
111 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
112 "pand %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
113 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
114 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
115 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
116 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
117 MOVNTQ" %%mm2, 16%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
118 MOVNTQ" %%mm3, 24%0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
119 :"=m"(*dest)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
120 :"m"(*s)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
121 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
122 dest += 32;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
123 s += 24;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
124 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
125 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
126 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
127 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
128 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
129 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
130 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
131 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
132 *dest++ = 0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
133 *dest++ = s[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
134 *dest++ = s[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
135 *dest++ = s[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
136 s+=3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
137 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
138 *dest++ = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
139 *dest++ = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
140 *dest++ = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
141 *dest++ = 0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
142 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
143 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
144 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
145
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
146 static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
147 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
148 uint8_t *dest = dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
149 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
150 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
151 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
152 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
153 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
154 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
155 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
156 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
157 mm_end = end - 31;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
158 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
159 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
160 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
161 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
162 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
163 "movq 8%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
164 "movq 16%1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
165 "movq 24%1, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
166 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
167 "movq %%mm1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
168 "movq %%mm4, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
169 "movq %%mm5, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
170 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
171 "psrlq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
172 "psrlq $8, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
173 "psrlq $8, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
174 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
175 "pand %2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
176 "pand %2, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
177 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
178 "pand %3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
179 "pand %3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
180 "pand %3, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
181 "pand %3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
182 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
183 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
184 "por %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
185 "por %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
186
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
187 "movq %%mm1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
188 "movq %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
189 "psllq $48, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
190 "psllq $32, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
191 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
192 "pand %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
193 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
194 "psrlq $16, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
195 "psrlq $32, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
196 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
197 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
198 "pand %6, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
199 "por %%mm5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
200
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
201 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
202 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
203 MOVNTQ" %%mm4, 16%0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
204 :"=m"(*dest)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
205 :"m"(*s),"m"(mask24l),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
206 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
207 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
208 dest += 24;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
209 s += 32;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
210 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
211 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
212 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
213 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
214 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
215 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
216 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
217 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
218 s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
219 dest[2] = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
220 dest[1] = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
221 dest[0] = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
222 dest += 3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
223 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
224 *dest++ = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
225 *dest++ = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
226 *dest++ = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
227 s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
228 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
229 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
230 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
231
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
232 /*
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
233 Original by Strepto/Astral
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
234 ported to gcc & bugfixed : A'rpi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
235 MMX2, 3DNOW optimization by Nick Kurshev
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
236 32bit c version, and and&add trick by Michael Niedermayer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
237 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
238 static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
239 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
240 register const uint8_t* s=src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
241 register uint8_t* d=dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
242 register const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
243 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
244 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
245 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
246 __asm __volatile(PREFETCH" %0"::"m"(*s));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
247 __asm __volatile("movq %0, %%mm4"::"m"(mask15s));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
248 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
249 while(s<mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
250 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
251 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
252 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
253 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
254 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
255 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
256 "movq %%mm2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
257 "pand %%mm4, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
258 "pand %%mm4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
259 "paddw %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
260 "paddw %%mm3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
261 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
262 MOVNTQ" %%mm2, 8%0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
263 :"=m"(*d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
264 :"m"(*s)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
265 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
266 d+=16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
267 s+=16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
268 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
269 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
270 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
271 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
272 mm_end = end - 3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
273 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
274 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
275 register unsigned x= *((uint32_t *)s);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
276 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
277 d+=4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
278 s+=4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
279 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
280 if(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
281 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
282 register unsigned short x= *((uint16_t *)s);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
283 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
284 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
285 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
286
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
287 static inline void RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
288 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
289 register const uint8_t* s=src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
290 register uint8_t* d=dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
291 register const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
292 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
293 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
294 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
295 __asm __volatile(PREFETCH" %0"::"m"(*s));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
296 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
297 __asm __volatile("movq %0, %%mm6"::"m"(mask15b));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
298 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
299 while(s<mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
300 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
301 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
302 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
303 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
304 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
305 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
306 "movq %%mm2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
307 "psrlq $1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
308 "psrlq $1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
309 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
310 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
311 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
312 "pand %%mm6, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
313 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
314 "por %%mm3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
315 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
316 MOVNTQ" %%mm2, 8%0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
317 :"=m"(*d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
318 :"m"(*s)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
319 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
320 d+=16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
321 s+=16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
322 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
323 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
324 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
325 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
326 mm_end = end - 3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
327 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
328 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
329 register uint32_t x= *((uint32_t *)s);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
330 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
331 s+=4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
332 d+=4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
333 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
334 if(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
335 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
336 register uint16_t x= *((uint16_t *)s);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
337 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
338 s+=2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
339 d+=2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
340 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
341 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
342
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
343 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
344 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
345 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
346 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
347 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
348 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
349 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
350 uint16_t *d = (uint16_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
351 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
352 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
353 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
354 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
355 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
356 "movq %3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
357 "movq %4, %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
358 "movq %5, %%mm7 \n\t"
22996
2a60af5e78a7 skip MMX code in rgb32to16 if the size of the input is smaller than the
ivo
parents: 22995
diff changeset
359 "jmp 2f \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
360 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
361 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
362 PREFETCH" 32(%1) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
363 "movd (%1), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
364 "movd 4(%1), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
365 "punpckldq 8(%1), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
366 "punpckldq 12(%1), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
367 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
368 "movq %%mm3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
369 "pand %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
370 "pand %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
371 "pmaddwd %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
372 "pmaddwd %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
373 "pand %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
374 "pand %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
375 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
376 "por %%mm4, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
377 "psrld $5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
378 "pslld $11, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
379 "por %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
380 MOVNTQ" %%mm0, (%0) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
381 "add $16, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
382 "add $8, %0 \n\t"
22996
2a60af5e78a7 skip MMX code in rgb32to16 if the size of the input is smaller than the
ivo
parents: 22995
diff changeset
383 "2: \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
384 "cmp %2, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
385 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
386 : "+r" (d), "+r"(s)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
387 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
388 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
389 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
390 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
391 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
392 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
393 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
394 ::"m"(red_16mask),"m"(green_16mask));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
395 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
396 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
397 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
398 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
399 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
400 "movd 4%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
401 "punpckldq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
402 "punpckldq 12%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
403 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
404 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
405 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
406 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
407 "psrlq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
408 "psrlq $3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
409 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
410 "pand %2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
411 "psrlq $5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
412 "psrlq $5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
413 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
414 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
415 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
416 "psrlq $8, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
417 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
418 "pand %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
419 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
420 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
421 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
422 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
423 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
424 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
425 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
426 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
427 d += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
428 s += 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
429 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
430 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
431 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
432 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
433 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
434 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
435 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
436 register int rgb = *(uint32_t*)s; s += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
437 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
438 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
439 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
440
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
441 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
442 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
443 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
444 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
445 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
446 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
447 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
448 uint16_t *d = (uint16_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
449 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
450 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
451 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
452 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
453 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
454 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
455 ::"m"(red_16mask),"m"(green_16mask));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
456 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
457 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
458 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
459 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
460 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
461 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
462 "movd 4%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
463 "punpckldq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
464 "punpckldq 12%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
465 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
466 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
467 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
468 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
469 "psllq $8, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
470 "psllq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
471 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
472 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
473 "psrlq $5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
474 "psrlq $5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
475 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
476 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
477 "psrlq $19, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
478 "psrlq $19, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
479 "pand %2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
480 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
481 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
482 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
483 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
484 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
485 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
486 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
487 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
488 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
489 d += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
490 s += 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
491 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
492 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
493 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
494 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
495 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
496 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
497 register int rgb = *(uint32_t*)s; s += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
498 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
499 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
500 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
501
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
502 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
503 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
504 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
505 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
506 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
507 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
508 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
509 uint16_t *d = (uint16_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
510 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
511 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
512 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
513 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
514 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
515 "movq %3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
516 "movq %4, %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
517 "movq %5, %%mm7 \n\t"
22995
70d7c6206f33 skip MMX code in rgb32to15 if the size of the input is smaller than the
ivo
parents: 22994
diff changeset
518 "jmp 2f \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
519 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
520 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
521 PREFETCH" 32(%1) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
522 "movd (%1), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
523 "movd 4(%1), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
524 "punpckldq 8(%1), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
525 "punpckldq 12(%1), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
526 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
527 "movq %%mm3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
528 "pand %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
529 "pand %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
530 "pmaddwd %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
531 "pmaddwd %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
532 "pand %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
533 "pand %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
534 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
535 "por %%mm4, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
536 "psrld $6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
537 "pslld $10, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
538 "por %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
539 MOVNTQ" %%mm0, (%0) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
540 "add $16, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
541 "add $8, %0 \n\t"
22995
70d7c6206f33 skip MMX code in rgb32to15 if the size of the input is smaller than the
ivo
parents: 22994
diff changeset
542 "2: \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
543 "cmp %2, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
544 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
545 : "+r" (d), "+r"(s)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
546 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
547 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
548 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
549 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
550 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
551 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
552 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
553 ::"m"(red_15mask),"m"(green_15mask));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
554 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
555 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
556 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
557 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
558 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
559 "movd 4%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
560 "punpckldq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
561 "punpckldq 12%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
562 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
563 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
564 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
565 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
566 "psrlq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
567 "psrlq $3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
568 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
569 "pand %2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
570 "psrlq $6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
571 "psrlq $6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
572 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
573 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
574 "psrlq $9, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
575 "psrlq $9, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
576 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
577 "pand %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
578 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
579 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
580 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
581 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
582 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
583 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
584 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
585 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
586 d += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
587 s += 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
588 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
589 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
590 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
591 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
592 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
593 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
594 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
595 register int rgb = *(uint32_t*)s; s += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
596 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
597 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
598 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
599
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
600 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
601 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
602 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
603 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
604 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
605 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
606 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
607 uint16_t *d = (uint16_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
608 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
609 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
610 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
611 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
612 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
613 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
614 ::"m"(red_15mask),"m"(green_15mask));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
615 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
616 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
617 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
618 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
619 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
620 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
621 "movd 4%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
622 "punpckldq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
623 "punpckldq 12%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
624 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
625 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
626 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
627 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
628 "psllq $7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
629 "psllq $7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
630 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
631 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
632 "psrlq $6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
633 "psrlq $6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
634 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
635 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
636 "psrlq $19, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
637 "psrlq $19, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
638 "pand %2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
639 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
640 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
641 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
642 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
643 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
644 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
645 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
646 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
647 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
648 d += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
649 s += 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
650 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
651 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
652 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
653 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
654 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
655 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
656 register int rgb = *(uint32_t*)s; s += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
657 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
658 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
659 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
660
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
661 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
662 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
663 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
664 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
665 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
666 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
667 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
668 uint16_t *d = (uint16_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
669 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
670 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
671 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
672 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
673 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
674 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
675 ::"m"(red_16mask),"m"(green_16mask));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
676 mm_end = end - 11;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
677 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
678 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
679 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
680 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
681 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
682 "movd 3%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
683 "punpckldq 6%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
684 "punpckldq 9%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
685 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
686 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
687 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
688 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
689 "psrlq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
690 "psrlq $3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
691 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
692 "pand %2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
693 "psrlq $5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
694 "psrlq $5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
695 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
696 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
697 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
698 "psrlq $8, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
699 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
700 "pand %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
701 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
702 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
703 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
704 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
705 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
706 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
707 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
708 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
709 d += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
710 s += 12;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
711 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
712 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
713 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
714 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
715 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
716 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
717 const int b= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
718 const int g= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
719 const int r= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
720 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
721 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
722 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
723
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
724 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
725 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
726 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
727 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
728 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
729 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
730 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
731 uint16_t *d = (uint16_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
732 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
733 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
734 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
735 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
736 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
737 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
738 ::"m"(red_16mask),"m"(green_16mask));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
739 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
740 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
741 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
742 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
743 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
744 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
745 "movd 3%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
746 "punpckldq 6%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
747 "punpckldq 9%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
748 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
749 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
750 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
751 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
752 "psllq $8, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
753 "psllq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
754 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
755 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
756 "psrlq $5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
757 "psrlq $5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
758 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
759 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
760 "psrlq $19, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
761 "psrlq $19, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
762 "pand %2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
763 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
764 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
765 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
766 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
767 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
768 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
769 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
770 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
771 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
772 d += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
773 s += 12;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
774 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
775 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
776 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
777 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
778 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
779 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
780 const int r= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
781 const int g= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
782 const int b= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
783 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
784 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
785 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
786
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
787 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
788 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
789 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
790 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
791 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
792 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
793 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
794 uint16_t *d = (uint16_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
795 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
796 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
797 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
798 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
799 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
800 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
801 ::"m"(red_15mask),"m"(green_15mask));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
802 mm_end = end - 11;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
803 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
804 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
805 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
806 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
807 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
808 "movd 3%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
809 "punpckldq 6%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
810 "punpckldq 9%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
811 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
812 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
813 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
814 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
815 "psrlq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
816 "psrlq $3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
817 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
818 "pand %2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
819 "psrlq $6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
820 "psrlq $6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
821 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
822 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
823 "psrlq $9, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
824 "psrlq $9, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
825 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
826 "pand %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
827 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
828 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
829 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
830 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
831 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
832 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
833 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
834 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
835 d += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
836 s += 12;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
837 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
838 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
839 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
840 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
841 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
842 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
843 const int b= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
844 const int g= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
845 const int r= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
846 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
847 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
848 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
849
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
850 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
851 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
852 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
853 const uint8_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
854 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
855 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
856 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
857 uint16_t *d = (uint16_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
858 end = s + src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
859 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
860 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
861 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
862 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
863 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
864 ::"m"(red_15mask),"m"(green_15mask));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
865 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
866 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
867 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
868 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
869 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
870 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
871 "movd 3%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
872 "punpckldq 6%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
873 "punpckldq 9%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
874 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
875 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
876 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
877 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
878 "psllq $7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
879 "psllq $7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
880 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
881 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
882 "psrlq $6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
883 "psrlq $6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
884 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
885 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
886 "psrlq $19, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
887 "psrlq $19, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
888 "pand %2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
889 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
890 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
891 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
892 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
893 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
894 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
895 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
896 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
897 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
898 d += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
899 s += 12;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
900 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
901 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
902 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
903 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
904 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
905 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
906 const int r= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
907 const int g= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
908 const int b= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
909 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
910 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
911 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
912
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
913 /*
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
914 I use here less accurate approximation by simply
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
915 left-shifting the input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
916 value and filling the low order bits with
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
917 zeroes. This method improves png's
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
918 compression but this scheme cannot reproduce white exactly, since it does not
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
919 generate an all-ones maximum value; the net effect is to darken the
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
920 image slightly.
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
921
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
922 The better method should be "left bit replication":
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
923
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
924 4 3 2 1 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
925 ---------
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
926 1 1 0 1 1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
927
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
928 7 6 5 4 3 2 1 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
929 ----------------
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
930 1 1 0 1 1 1 1 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
931 |=======| |===|
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
932 | Leftmost Bits Repeated to Fill Open Bits
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
933 |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
934 Original Bits
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
935 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
936 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
937 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
938 const uint16_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
939 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
940 const uint16_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
941 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
942 uint8_t *d = (uint8_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
943 const uint16_t *s = (uint16_t *)src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
944 end = s + src_size/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
945 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
946 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
947 mm_end = end - 7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
948 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
949 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
950 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
951 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
952 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
953 "movq %1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
954 "movq %1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
955 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
956 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
957 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
958 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
959 "psrlq $2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
960 "psrlq $7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
961 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
962 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
963 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
964 "punpcklwd %5, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
965 "punpcklwd %5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
966 "punpcklwd %5, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
967 "punpckhwd %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
968 "punpckhwd %5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
969 "punpckhwd %5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
970 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
971 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
972 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
973 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
974 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
975 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
976 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
977 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
978
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
979 "movq %%mm0, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
980 "movq %%mm3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
981
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
982 "movq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
983 "movq 8%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
984 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
985 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
986 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
987 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
988 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
989 "psrlq $2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
990 "psrlq $7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
991 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
992 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
993 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
994 "punpcklwd %5, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
995 "punpcklwd %5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
996 "punpcklwd %5, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
997 "punpckhwd %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
998 "punpckhwd %5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
999 "punpckhwd %5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1000 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1001 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1002 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1003 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1004 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1005 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1006 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1007 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1008
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1009 :"=m"(*d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1010 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1011 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1012 /* Borrowed 32 to 24 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1013 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1014 "movq %%mm0, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1015 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1016 "movq %%mm6, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1017 "movq %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1018
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1019 "movq %%mm4, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1020 "movq %%mm5, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1021 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1022 "movq %%mm1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1023
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1024 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1025 "psrlq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1026 "psrlq $8, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1027 "psrlq $8, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1028 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1029 "pand %2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1030 "pand %2, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1031 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1032 "pand %3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1033 "pand %3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1034 "pand %3, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1035 "pand %3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1036 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1037 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1038 "por %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1039 "por %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1040
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1041 "movq %%mm1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1042 "movq %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1043 "psllq $48, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1044 "psllq $32, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1045 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1046 "pand %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1047 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1048 "psrlq $16, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1049 "psrlq $32, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1050 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1051 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1052 "pand %6, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1053 "por %%mm5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1054
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1055 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1056 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1057 MOVNTQ" %%mm4, 16%0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1058
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1059 :"=m"(*d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1060 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1061 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1062 d += 24;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1063 s += 8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1064 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1065 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1066 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1067 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1068 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1069 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1070 register uint16_t bgr;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1071 bgr = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1072 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1073 *d++ = (bgr&0x3E0)>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1074 *d++ = (bgr&0x7C00)>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1075 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1076 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1077
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1078 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1079 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1080 const uint16_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1081 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1082 const uint16_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1083 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1084 uint8_t *d = (uint8_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1085 const uint16_t *s = (const uint16_t *)src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1086 end = s + src_size/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1087 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1088 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1089 mm_end = end - 7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1090 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1091 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1092 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1093 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1094 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1095 "movq %1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1096 "movq %1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1097 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1098 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1099 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1100 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1101 "psrlq $3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1102 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1103 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1104 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1105 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1106 "punpcklwd %5, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1107 "punpcklwd %5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1108 "punpcklwd %5, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1109 "punpckhwd %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1110 "punpckhwd %5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1111 "punpckhwd %5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1112 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1113 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1114 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1115 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1116 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1117 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1118 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1119 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1120
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1121 "movq %%mm0, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1122 "movq %%mm3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1123
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1124 "movq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1125 "movq 8%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1126 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1127 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1128 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1129 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1130 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1131 "psrlq $3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1132 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1133 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1134 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1135 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1136 "punpcklwd %5, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1137 "punpcklwd %5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1138 "punpcklwd %5, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1139 "punpckhwd %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1140 "punpckhwd %5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1141 "punpckhwd %5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1142 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1143 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1144 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1145 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1146 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1147 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1148 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1149 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1150 :"=m"(*d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1151 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1152 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1153 /* Borrowed 32 to 24 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1154 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1155 "movq %%mm0, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1156 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1157 "movq %%mm6, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1158 "movq %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1159
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1160 "movq %%mm4, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1161 "movq %%mm5, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1162 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1163 "movq %%mm1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1164
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1165 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1166 "psrlq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1167 "psrlq $8, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1168 "psrlq $8, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1169 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1170 "pand %2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1171 "pand %2, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1172 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1173 "pand %3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1174 "pand %3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1175 "pand %3, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1176 "pand %3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1177 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1178 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1179 "por %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1180 "por %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1181
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1182 "movq %%mm1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1183 "movq %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1184 "psllq $48, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1185 "psllq $32, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1186 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1187 "pand %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1188 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1189 "psrlq $16, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1190 "psrlq $32, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1191 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1192 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1193 "pand %6, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1194 "por %%mm5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1195
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1196 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1197 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1198 MOVNTQ" %%mm4, 16%0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1199
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1200 :"=m"(*d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1201 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1202 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1203 d += 24;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1204 s += 8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1205 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1206 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1207 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1208 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1209 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1210 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1211 register uint16_t bgr;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1212 bgr = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1213 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1214 *d++ = (bgr&0x7E0)>>3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1215 *d++ = (bgr&0xF800)>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1216 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1217 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1218
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1219 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1220 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1221 const uint16_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1222 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1223 const uint16_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1224 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1225 uint8_t *d = (uint8_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1226 const uint16_t *s = (const uint16_t *)src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1227 end = s + src_size/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1228 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1229 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1230 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1231 mm_end = end - 3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1232 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1233 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1234 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1235 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1236 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1237 "movq %1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1238 "movq %1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1239 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1240 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1241 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1242 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1243 "psrlq $2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1244 "psrlq $7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1245 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1246 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1247 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1248 "punpcklwd %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1249 "punpcklwd %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1250 "punpcklwd %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1251 "punpckhwd %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1252 "punpckhwd %%mm7, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1253 "punpckhwd %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1254 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1255 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1256 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1257 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1258 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1259 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1260 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1261 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1262 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1263 MOVNTQ" %%mm3, 8%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1264 :"=m"(*d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1265 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1266 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1267 d += 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1268 s += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1269 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1270 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1271 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1272 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1273 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1274 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1275 #if 0 //slightly slower on athlon
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1276 int bgr= *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1277 *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1278 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1279 register uint16_t bgr;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1280 bgr = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1281 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1282 *d++ = 0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1283 *d++ = (bgr&0x7C00)>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1284 *d++ = (bgr&0x3E0)>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1285 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1286 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1287 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1288 *d++ = (bgr&0x3E0)>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1289 *d++ = (bgr&0x7C00)>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1290 *d++ = 0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1291 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1292
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1293 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1294 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1295 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1296
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1297 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1298 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1299 const uint16_t *end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1300 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1301 const uint16_t *mm_end;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1302 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1303 uint8_t *d = (uint8_t *)dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1304 const uint16_t *s = (uint16_t *)src;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1305 end = s + src_size/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1306 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1307 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1308 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1309 mm_end = end - 3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1310 while(s < mm_end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1311 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1312 __asm __volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1313 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1314 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1315 "movq %1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1316 "movq %1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1317 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1318 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1319 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1320 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1321 "psrlq $3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1322 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1323 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1324 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1325 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1326 "punpcklwd %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1327 "punpcklwd %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1328 "punpcklwd %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1329 "punpckhwd %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1330 "punpckhwd %%mm7, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1331 "punpckhwd %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1332 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1333 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1334 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1335 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1336 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1337 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1338 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1339 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1340 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1341 MOVNTQ" %%mm3, 8%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1342 :"=m"(*d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1343 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1344 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1345 d += 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1346 s += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1347 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1348 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1349 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1350 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1351 while(s < end)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1352 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1353 register uint16_t bgr;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1354 bgr = *s++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1355 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1356 *d++ = 0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1357 *d++ = (bgr&0xF800)>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1358 *d++ = (bgr&0x7E0)>>3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1359 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1360 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1361 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1362 *d++ = (bgr&0x7E0)>>3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1363 *d++ = (bgr&0xF800)>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1364 *d++ = 0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1365 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1366 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1367 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1368
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1369 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1370 {
22994
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1371 long idx = 15 - src_size;
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1372 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1373 #ifdef HAVE_MMX
22991
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1374 __asm __volatile(
22994
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1375 " test %0, %0 \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1376 " jns 2f \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1377 " "PREFETCH" (%1, %0) \n"
22991
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1378 " movq %3, %%mm7 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1379 " pxor %4, %%mm7 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1380 " movq %%mm7, %%mm6 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1381 " pxor %5, %%mm7 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1382 ASMALIGN(4)
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1383 "1: \n"
22994
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1384 " "PREFETCH" 32(%1, %0) \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1385 " movq (%1, %0), %%mm0 \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1386 " movq 8(%1, %0), %%mm1 \n"
22991
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1387 # ifdef HAVE_MMX2
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1388 " pshufw $177, %%mm0, %%mm3 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1389 " pshufw $177, %%mm1, %%mm5 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1390 " pand %%mm7, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1391 " pand %%mm6, %%mm3 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1392 " pand %%mm7, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1393 " pand %%mm6, %%mm5 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1394 " por %%mm3, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1395 " por %%mm5, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1396 # else
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1397 " movq %%mm0, %%mm2 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1398 " movq %%mm1, %%mm4 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1399 " pand %%mm7, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1400 " pand %%mm6, %%mm2 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1401 " pand %%mm7, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1402 " pand %%mm6, %%mm4 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1403 " movq %%mm2, %%mm3 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1404 " movq %%mm4, %%mm5 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1405 " pslld $16, %%mm2 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1406 " psrld $16, %%mm3 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1407 " pslld $16, %%mm4 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1408 " psrld $16, %%mm5 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1409 " por %%mm2, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1410 " por %%mm4, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1411 " por %%mm3, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1412 " por %%mm5, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1413 # endif
22994
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1414 " "MOVNTQ" %%mm0, (%2, %0) \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1415 " "MOVNTQ" %%mm1, 8(%2, %0) \n"
22991
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1416 " add $16, %0 \n"
22994
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1417 " js 1b \n"
22991
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1418 " "SFENCE" \n"
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1419 " "EMMS" \n"
22994
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1420 "2: \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1421 : "+&r"(idx)
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1422 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
22991
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1423 : "memory");
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1424 #endif
22994
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1425 for (; idx<15; idx+=4) {
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1426 register int v = *(uint32_t *)&s[idx], g = v & 0xff00;
22991
59671a52cc82 New implementation of rgb32tobgr32
ivo
parents: 22960
diff changeset
1427 v &= 0xff00ff;
22994
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp
ivo
parents: 22991
diff changeset
1428 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1429 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1430 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1431
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1432 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1433 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1434 unsigned i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1435 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1436 long mmx_size= 23 - src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1437 asm volatile (
22997
fd0fda0c6555 skip MMX code in rgb24tobgr24 if the size of the input is smaller than the
ivo
parents: 22996
diff changeset
1438 "test %%"REG_a", %%"REG_a" \n\t"
fd0fda0c6555 skip MMX code in rgb24tobgr24 if the size of the input is smaller than the
ivo
parents: 22996
diff changeset
1439 "jns 2f \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1440 "movq "MANGLE(mask24r)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1441 "movq "MANGLE(mask24g)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1442 "movq "MANGLE(mask24b)", %%mm7 \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
1443 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1444 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1445 PREFETCH" 32(%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1446 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1447 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1448 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1449 "psllq $16, %%mm0 \n\t" // 00 BGR BGR
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1450 "pand %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1451 "pand %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1452 "pand %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1453 "por %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1454 "por %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1455 "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1456 MOVNTQ" %%mm1, (%2, %%"REG_a")\n\t" // RGB RGB RG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1457 "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1458 "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1459 "pand %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1460 "pand %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1461 "pand %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1462 "por %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1463 "por %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1464 "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1465 MOVNTQ" %%mm1, 8(%2, %%"REG_a")\n\t" // B RGB RGB R
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1466 "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1467 "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1468 "pand %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1469 "pand %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1470 "pand %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1471 "por %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1472 "por %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1473 MOVNTQ" %%mm1, 16(%2, %%"REG_a")\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1474 "add $24, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1475 " js 1b \n\t"
22997
fd0fda0c6555 skip MMX code in rgb24tobgr24 if the size of the input is smaller than the
ivo
parents: 22996
diff changeset
1476 "2: \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1477 : "+a" (mmx_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1478 : "r" (src-mmx_size), "r"(dst-mmx_size)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1479 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1480
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1481 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1482 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1483
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1484 if(mmx_size==23) return; //finihsed, was multiple of 8
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1485
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1486 src+= src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1487 dst+= src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1488 src_size= 23-mmx_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1489 src-= src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1490 dst-= src_size;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1491 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1492 for(i=0; i<src_size; i+=3)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1493 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1494 register uint8_t x;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1495 x = src[i + 2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1496 dst[i + 1] = src[i + 1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1497 dst[i + 2] = src[i + 0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1498 dst[i + 0] = x;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1499 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1500 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1501
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1502 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1503 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1504 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1505 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1506 long y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1507 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1508 for(y=0; y<height; y++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1509 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1510 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1511 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1512 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1513 "xor %%"REG_a", %%"REG_a" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
1514 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1515 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1516 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1517 PREFETCH" 32(%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1518 PREFETCH" 32(%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1519 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1520 "movq %%mm0, %%mm2 \n\t" // U(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1521 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1522 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1523 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1524
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1525 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1526 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1527 "movq %%mm3, %%mm4 \n\t" // Y(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1528 "movq %%mm5, %%mm6 \n\t" // Y(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1529 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1530 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1531 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1532 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1533
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1534 MOVNTQ" %%mm3, (%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1535 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1536 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1537 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1538
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1539 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1540 "cmp %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1541 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1542 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1543 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1544 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1545 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1546
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1547 #if defined ARCH_ALPHA && defined HAVE_MVI
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1548 #define pl2yuy2(n) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1549 y1 = yc[n]; \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1550 y2 = yc2[n]; \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1551 u = uc[n]; \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1552 v = vc[n]; \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1553 asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1554 asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1555 asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1556 asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1557 yuv1 = (u << 8) + (v << 24); \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1558 yuv2 = yuv1 + y2; \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1559 yuv1 += y1; \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1560 qdst[n] = yuv1; \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1561 qdst2[n] = yuv2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1562
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1563 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1564 uint64_t *qdst = (uint64_t *) dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1565 uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1566 const uint32_t *yc = (uint32_t *) ysrc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1567 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1568 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1569 for(i = 0; i < chromWidth; i += 8){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1570 uint64_t y1, y2, yuv1, yuv2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1571 uint64_t u, v;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1572 /* Prefetch */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1573 asm("ldq $31,64(%0)" :: "r"(yc));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1574 asm("ldq $31,64(%0)" :: "r"(yc2));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1575 asm("ldq $31,64(%0)" :: "r"(uc));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1576 asm("ldq $31,64(%0)" :: "r"(vc));
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1577
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1578 pl2yuy2(0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1579 pl2yuy2(1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1580 pl2yuy2(2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1581 pl2yuy2(3);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1582
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1583 yc += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1584 yc2 += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1585 uc += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1586 vc += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1587 qdst += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1588 qdst2 += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1589 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1590 y++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1591 ysrc += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1592 dst += dstStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1593
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1594 #elif __WORDSIZE >= 64
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1595 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1596 uint64_t *ldst = (uint64_t *) dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1597 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1598 for(i = 0; i < chromWidth; i += 2){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1599 uint64_t k, l;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1600 k = yc[0] + (uc[0] << 8) +
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1601 (yc[1] << 16) + (vc[0] << 24);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1602 l = yc[2] + (uc[1] << 8) +
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1603 (yc[3] << 16) + (vc[1] << 24);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1604 *ldst++ = k + (l << 32);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1605 yc += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1606 uc += 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1607 vc += 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1608 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1609
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1610 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1611 int i, *idst = (int32_t *) dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1612 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1613 for(i = 0; i < chromWidth; i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1614 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1615 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1616 (yc[1] << 8) + (vc[0] << 0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1617 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1618 *idst++ = yc[0] + (uc[0] << 8) +
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1619 (yc[1] << 16) + (vc[0] << 24);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1620 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1621 yc += 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1622 uc++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1623 vc++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1624 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1625 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1626 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1627 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1628 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1629 usrc += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1630 vsrc += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1631 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1632 ysrc += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1633 dst += dstStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1634 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1635 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1636 asm( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1637 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1638 :::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1639 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1640 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1641
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1642 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1643 *
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1644 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1645 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1646 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1647 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1648 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1649 long lumStride, long chromStride, long dstStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1650 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1651 //FIXME interpolate chroma
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1652 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1653 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1654
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1655 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1656 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1657 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1658 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1659 long y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1660 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1661 for(y=0; y<height; y++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1662 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1663 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1664 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1665 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1666 "xor %%"REG_a", %%"REG_a" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
1667 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1668 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1669 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1670 PREFETCH" 32(%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1671 PREFETCH" 32(%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1672 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1673 "movq %%mm0, %%mm2 \n\t" // U(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1674 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1675 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1676 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1677
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1678 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1679 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1680 "movq %%mm0, %%mm4 \n\t" // Y(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1681 "movq %%mm2, %%mm6 \n\t" // Y(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1682 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1683 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1684 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1685 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1686
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1687 MOVNTQ" %%mm0, (%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1688 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1689 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1690 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1691
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1692 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1693 "cmp %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1694 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1695 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1696 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1697 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1698 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1699 //FIXME adapt the alpha asm code from yv12->yuy2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1700
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1701 #if __WORDSIZE >= 64
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1702 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1703 uint64_t *ldst = (uint64_t *) dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1704 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1705 for(i = 0; i < chromWidth; i += 2){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1706 uint64_t k, l;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1707 k = uc[0] + (yc[0] << 8) +
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1708 (vc[0] << 16) + (yc[1] << 24);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1709 l = uc[1] + (yc[2] << 8) +
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1710 (vc[1] << 16) + (yc[3] << 24);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1711 *ldst++ = k + (l << 32);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1712 yc += 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1713 uc += 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1714 vc += 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1715 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1716
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1717 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1718 int i, *idst = (int32_t *) dst;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1719 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1720 for(i = 0; i < chromWidth; i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1721 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1722 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1723 (vc[0] << 8) + (yc[1] << 0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1724 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1725 *idst++ = uc[0] + (yc[0] << 8) +
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1726 (vc[0] << 16) + (yc[1] << 24);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1727 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1728 yc += 2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1729 uc++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1730 vc++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1731 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1732 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1733 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1734 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1735 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1736 usrc += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1737 vsrc += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1738 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1739 ysrc += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1740 dst += dstStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1741 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1742 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1743 asm( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1744 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1745 :::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1746 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1747 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1748
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1749 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1750 *
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1751 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1752 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1753 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1754 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1755 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1756 long lumStride, long chromStride, long dstStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1757 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1758 //FIXME interpolate chroma
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1759 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1760 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1761
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1762 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1763 *
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1764 * width should be a multiple of 16
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1765 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1766 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1767 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1768 long lumStride, long chromStride, long dstStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1769 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1770 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1771 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1772
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1773 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1774 *
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1775 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1776 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1777 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1778 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1779 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1780 long lumStride, long chromStride, long srcStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1781 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1782 long y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1783 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1784 for(y=0; y<height; y+=2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1785 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1786 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1787 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1788 "xor %%"REG_a", %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1789 "pcmpeqw %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1790 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
1791 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1792 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1793 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1794 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1795 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1796 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1797 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1798 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1799 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1800 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1801 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1802 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1803 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1804
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1805 MOVNTQ" %%mm2, (%1, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1806
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1807 "movq 16(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1808 "movq 24(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1809 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1810 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1811 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1812 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1813 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1814 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1815 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1816 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1817
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1818 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1819
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1820 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1821 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1822 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1823 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1824 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1825 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1826 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1827 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1828
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1829 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1830 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1831
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1832 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1833 "cmp %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1834 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1835 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1836 : "memory", "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1837 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1838
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1839 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1840 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1841
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1842 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1843 "xor %%"REG_a", %%"REG_a" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
1844 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1845 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1846 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1847 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1848 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1849 "movq 16(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1850 "movq 24(%0, %%"REG_a", 4), %%mm3\n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1851 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1852 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1853 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1854 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1855 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1856 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1857
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1858 MOVNTQ" %%mm0, (%1, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1859 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1860
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1861 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1862 "cmp %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1863 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1864
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1865 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1866 : "memory", "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1867 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1868 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1869 long i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1870 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1871 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1872 ydst[2*i+0] = src[4*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1873 udst[i] = src[4*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1874 ydst[2*i+1] = src[4*i+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1875 vdst[i] = src[4*i+3];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1876 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1877 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1878 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1879
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1880 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1881 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1882 ydst[2*i+0] = src[4*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1883 ydst[2*i+1] = src[4*i+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1884 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1885 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1886 udst += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1887 vdst += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1888 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1889 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1890 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1891 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1892 asm volatile( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1893 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1894 :::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1895 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1896 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1897
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1898 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1899 uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1900 long width, long height, long lumStride, long chromStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1901 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1902 /* Y Plane */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1903 memcpy(ydst, ysrc, width*height);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1904
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1905 /* XXX: implement upscaling for U,V */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1906 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1907
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1908 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1909 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1910 long x,y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1911
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1912 dst[0]= src[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1913
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1914 // first line
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1915 for(x=0; x<srcWidth-1; x++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1916 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1917 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1918 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1919 dst[2*srcWidth-1]= src[srcWidth-1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1920
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1921 dst+= dstStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1922
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1923 for(y=1; y<srcHeight; y++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1924 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1925 const long mmxSize= srcWidth&~15;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1926 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1927 "mov %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1928 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1929 "movq (%0, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1930 "movq (%1, %%"REG_a"), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1931 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1932 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1933 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1934 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1935 PAVGB" %%mm0, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1936 PAVGB" %%mm0, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1937 PAVGB" %%mm0, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1938 PAVGB" %%mm0, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1939 PAVGB" %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1940 PAVGB" %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1941 PAVGB" %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1942 PAVGB" %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1943 "movq %%mm5, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1944 "movq %%mm4, %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1945 "punpcklbw %%mm3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1946 "punpckhbw %%mm3, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1947 "punpcklbw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1948 "punpckhbw %%mm2, %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1949 #if 1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1950 MOVNTQ" %%mm5, (%2, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1951 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1952 MOVNTQ" %%mm4, (%3, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1953 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1954 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1955 "movq %%mm5, (%2, %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1956 "movq %%mm7, 8(%2, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1957 "movq %%mm4, (%3, %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1958 "movq %%mm6, 8(%3, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1959 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1960 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1961 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1962 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1963 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1964 "g" (-mmxSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1965 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1966
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1967 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1968 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1969 const long mmxSize=1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1970 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1971 dst[0 ]= (3*src[0] + src[srcStride])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1972 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1973
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1974 for(x=mmxSize-1; x<srcWidth-1; x++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1975 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1976 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1977 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1978 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1979 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1980 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1981 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1982
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1983 dst+=dstStride*2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1984 src+=srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1985 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1986
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1987 // last line
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1988 #if 1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1989 dst[0]= src[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1990
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1991 for(x=0; x<srcWidth-1; x++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1992 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1993 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1994 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1995 dst[2*srcWidth-1]= src[srcWidth-1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1996 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1997 for(x=0; x<srcWidth; x++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1998 dst[2*x+0]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1999 dst[2*x+1]= src[x];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2000 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2001 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2002
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2003 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2004 asm volatile( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2005 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2006 :::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2007 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2008 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2009
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2010 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2011 *
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2012 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2013 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2014 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2015 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2016 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2017 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2018 long lumStride, long chromStride, long srcStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2019 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2020 long y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2021 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2022 for(y=0; y<height; y+=2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2023 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2024 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2025 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2026 "xorl %%eax, %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2027 "pcmpeqw %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2028 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
2029 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2030 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2031 PREFETCH" 64(%0, %%eax, 4) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2032 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2033 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2034 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2035 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2036 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2037 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2038 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2039 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2040 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2041 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2042
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2043 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2044
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2045 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2046 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // UYVY UYVY(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2047 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2048 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2049 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2050 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2051 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2052 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2053 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2054 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2055
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2056 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2057
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2058 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2059 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2060 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2061 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2062 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2063 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2064 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2065 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2066
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2067 MOVNTQ" %%mm0, (%3, %%eax) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2068 MOVNTQ" %%mm2, (%2, %%eax) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2069
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2070 "addl $8, %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2071 "cmpl %4, %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2072 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2073 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2074 : "memory", "%eax"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2075 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2076
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2077 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2078 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2079
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2080 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2081 "xorl %%eax, %%eax \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
2082 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2083 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2084 PREFETCH" 64(%0, %%eax, 4) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2085 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2086 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2087 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2088 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2089 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2090 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2091 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2092 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2093 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2094 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2095
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2096 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2097 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2098
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2099 "addl $8, %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2100 "cmpl %4, %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2101 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2102
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2103 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2104 : "memory", "%eax"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2105 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2106 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2107 long i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2108 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2109 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2110 udst[i] = src[4*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2111 ydst[2*i+0] = src[4*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2112 vdst[i] = src[4*i+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2113 ydst[2*i+1] = src[4*i+3];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2114 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2115 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2116 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2117
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2118 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2119 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2120 ydst[2*i+0] = src[4*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2121 ydst[2*i+1] = src[4*i+3];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2122 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2123 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2124 udst += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2125 vdst += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2126 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2127 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2128 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2129 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2130 asm volatile( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2131 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2132 :::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2133 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2134 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2135
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2136 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2137 *
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2138 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2139 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2140 * chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2141 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2142 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2143 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2144 long lumStride, long chromStride, long srcStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2145 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2146 long y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2147 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2148 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2149 for(y=0; y<height-2; y+=2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2150 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2151 long i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2152 for(i=0; i<2; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2153 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2154 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2155 "mov %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2156 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2157 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2158 "pxor %%mm7, %%mm7 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2159 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
2160 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2161 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2162 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2163 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2164 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2165 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2166 "punpcklbw %%mm7, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2167 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2168 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2169 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2170 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2171 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2172 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2173 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2174 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2175 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2176 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2177 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2178 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2179 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2180 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2181 "packssdw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2182 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2183 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2184 "pmaddwd %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2185 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2186 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2187
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2188 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2189 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2190 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2191 "punpcklbw %%mm7, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2192 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2193 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2194 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2195 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2196 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2197 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2198 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2199 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2200 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2201 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2202 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2203 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2204 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2205 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2206 "packssdw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2207 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2208 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2209 "pmaddwd %%mm5, %%mm2 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2210 "add $24, %%"REG_d" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2211 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2212 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2213
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2214 "packuswb %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2215 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2216
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2217 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2218 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2219 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2220 : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2221 : "%"REG_a, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2222 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2223 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2224 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2225 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2226 src -= srcStride*2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2227 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2228 "mov %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2229 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2230 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2231 "pxor %%mm7, %%mm7 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2232 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2233 "add %%"REG_d", %%"REG_d" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18861
diff changeset
2234 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2235 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2236 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2237 PREFETCH" 64(%1, %%"REG_d") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2238 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2239 "movq (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2240 "movq (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2241 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2242 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2243 PAVGB" %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2244 PAVGB" %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2245 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2246 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2247 "psrlq $24, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2248 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2249 PAVGB" %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2250 PAVGB" %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2251 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2252 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2253 #else
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2254 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2255 "movd (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2256 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2257 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2258 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2259 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2260 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2261 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2262 "paddw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2263 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2264 "paddw %%mm2, %%mm0 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2265 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2266 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2267 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2268 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2269 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2270 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2271 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2272 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2273 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2274 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2275 "paddw %%mm4, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2276 "psrlw $2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2277 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2278 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2279 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2280 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2281
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2282 "pmaddwd %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2283 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2284 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2285 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2286 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2287 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2288 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2289 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2290 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2291 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2292 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2293 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2294 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2295 "pmaddwd %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2296 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2297 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2298
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2299 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2300 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2301 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2302 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2303 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2304 PAVGB" %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2305 PAVGB" %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2306 "movq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2307 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2308 "psrlq $24, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2309 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2310 PAVGB" %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2311 PAVGB" %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2312 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2313 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2314 #else
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2315 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2316 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2317 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2318 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2319 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2320 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2321 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2322 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2323 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2324 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2325 "paddw %%mm2, %%mm4 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2326 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2327 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2328 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2329 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2330 "punpcklbw %%mm7, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2331 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2332 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2333 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2334 "paddw %%mm1, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2335 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2336 "paddw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2337 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2338 "psrlw $2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2339 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2340 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2341 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2342 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2343
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2344 "pmaddwd %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2345 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2346 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2347 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2348 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2349 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2350 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2351 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2352 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2353 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2354 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2355 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2356 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2357 "pmaddwd %%mm5, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2358 "add $24, %%"REG_d" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2359 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2360 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2361
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2362 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2363 "punpckldq %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2364 "punpckhdq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2365 "packsswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2366 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2367 "movd %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2368 "punpckhdq %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2369 "movd %%mm0, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2370 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2371 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2372 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2373 : "%"REG_a, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2374 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2375
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2376 udst += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2377 vdst += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2378 src += srcStride*2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2379 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2380
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2381 asm volatile( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2382 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2383 :::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2384 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2385 y=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2386 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2387 for(; y<height; y+=2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2388 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2389 long i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2390 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2391 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2392 unsigned int b= src[6*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2393 unsigned int g= src[6*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2394 unsigned int r= src[6*i+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2395
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2396 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2397 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2398 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2399
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2400 udst[i] = U;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2401 vdst[i] = V;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2402 ydst[2*i] = Y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2403
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2404 b= src[6*i+3];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2405 g= src[6*i+4];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2406 r= src[6*i+5];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2407
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2408 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2409 ydst[2*i+1] = Y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2410 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2411 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2412 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2413
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2414 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2415 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2416 unsigned int b= src[6*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2417 unsigned int g= src[6*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2418 unsigned int r= src[6*i+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2419
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2420 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2421
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2422 ydst[2*i] = Y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2423
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2424 b= src[6*i+3];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2425 g= src[6*i+4];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2426 r= src[6*i+5];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2427
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2428 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2429 ydst[2*i+1] = Y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2430 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2431 udst += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2432 vdst += chromStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2433 ydst += lumStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2434 src += srcStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2435 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2436 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2437
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2438 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2439 long width, long height, long src1Stride,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2440 long src2Stride, long dstStride){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2441 long h;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2442
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2443 for(h=0; h < height; h++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2444 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2445 long w;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2446
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2447 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2448 #ifdef HAVE_SSE2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2449 asm(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2450 "xor %%"REG_a", %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2451 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2452 PREFETCH" 64(%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2453 PREFETCH" 64(%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2454 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2455 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2456 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2457 "punpcklbw %%xmm2, %%xmm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2458 "punpckhbw %%xmm2, %%xmm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2459 "movntdq %%xmm0, (%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2460 "movntdq %%xmm1, 16(%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2461 "add $16, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2462 "cmp %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2463 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2464 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2465 : "memory", "%"REG_a""
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2466 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2467 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2468 asm(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2469 "xor %%"REG_a", %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2470 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2471 PREFETCH" 64(%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2472 PREFETCH" 64(%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2473 "movq (%1, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2474 "movq 8(%1, %%"REG_a"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2475 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2476 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2477 "movq (%2, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2478 "movq 8(%2, %%"REG_a"), %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2479 "punpcklbw %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2480 "punpckhbw %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2481 "punpcklbw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2482 "punpckhbw %%mm5, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2483 MOVNTQ" %%mm0, (%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2484 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2485 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2486 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2487 "add $16, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2488 "cmp %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2489 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2490 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2491 : "memory", "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2492 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2493 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2494 for(w= (width&(~15)); w < width; w++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2495 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2496 dest[2*w+0] = src1[w];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2497 dest[2*w+1] = src2[w];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2498 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2499 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2500 for(w=0; w < width; w++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2501 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2502 dest[2*w+0] = src1[w];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2503 dest[2*w+1] = src2[w];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2504 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2505 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2506 dest += dstStride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2507 src1 += src1Stride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2508 src2 += src2Stride;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2509 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2510 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2511 asm(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2512 EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2513 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2514 ::: "memory"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2515 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2516 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2517 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2518
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2519 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2520 uint8_t *dst1, uint8_t *dst2,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2521 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2522 long srcStride1, long srcStride2,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2523 long dstStride1, long dstStride2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2524 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2525 long y,x,w,h;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2526 w=width/2; h=height/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2527 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2528 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2529 PREFETCH" %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2530 PREFETCH" %1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2531 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2532 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2533 for(y=0;y<h;y++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2534 const uint8_t* s1=src1+srcStride1*(y>>1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2535 uint8_t* d=dst1+dstStride1*y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2536 x=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2537 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2538 for(;x<w-31;x+=32)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2539 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2540 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2541 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2542 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2543 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2544 "movq 16%1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2545 "movq 24%1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2546 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2547 "movq %%mm2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2548 "movq %%mm4, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2549 "movq %%mm6, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2550 "punpcklbw %%mm0, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2551 "punpckhbw %%mm1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2552 "punpcklbw %%mm2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2553 "punpckhbw %%mm3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2554 "punpcklbw %%mm4, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2555 "punpckhbw %%mm5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2556 "punpcklbw %%mm6, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2557 "punpckhbw %%mm7, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2558 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2559 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2560 MOVNTQ" %%mm2, 16%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2561 MOVNTQ" %%mm3, 24%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2562 MOVNTQ" %%mm4, 32%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2563 MOVNTQ" %%mm5, 40%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2564 MOVNTQ" %%mm6, 48%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2565 MOVNTQ" %%mm7, 56%0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2566 :"=m"(d[2*x])
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2567 :"m"(s1[x])
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2568 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2569 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2570 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2571 for(;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2572 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2573 for(y=0;y<h;y++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2574 const uint8_t* s2=src2+srcStride2*(y>>1);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2575 uint8_t* d=dst2+dstStride2*y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2576 x=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2577 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2578 for(;x<w-31;x+=32)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2579 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2580 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2581 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2582 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2583 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2584 "movq 16%1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2585 "movq 24%1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2586 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2587 "movq %%mm2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2588 "movq %%mm4, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2589 "movq %%mm6, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2590 "punpcklbw %%mm0, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2591 "punpckhbw %%mm1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2592 "punpcklbw %%mm2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2593 "punpckhbw %%mm3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2594 "punpcklbw %%mm4, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2595 "punpckhbw %%mm5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2596 "punpcklbw %%mm6, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2597 "punpckhbw %%mm7, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2598 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2599 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2600 MOVNTQ" %%mm2, 16%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2601 MOVNTQ" %%mm3, 24%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2602 MOVNTQ" %%mm4, 32%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2603 MOVNTQ" %%mm5, 40%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2604 MOVNTQ" %%mm6, 48%0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2605 MOVNTQ" %%mm7, 56%0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2606 :"=m"(d[2*x])
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2607 :"m"(s2[x])
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2608 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2609 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2610 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2611 for(;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2612 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2613 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2614 asm(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2615 EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2616 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2617 ::: "memory"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2618 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2619 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2620 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2621
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2622 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2623 uint8_t *dst,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2624 long width, long height,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2625 long srcStride1, long srcStride2,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2626 long srcStride3, long dstStride)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2627 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2628 long y,x,w,h;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2629 w=width/2; h=height;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2630 for(y=0;y<h;y++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2631 const uint8_t* yp=src1+srcStride1*y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2632 const uint8_t* up=src2+srcStride2*(y>>2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2633 const uint8_t* vp=src3+srcStride3*(y>>2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2634 uint8_t* d=dst+dstStride*y;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2635 x=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2636 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2637 for(;x<w-7;x+=8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2638 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2639 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2640 PREFETCH" 32(%1, %0)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2641 PREFETCH" 32(%2, %0)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2642 PREFETCH" 32(%3, %0)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2643 "movq (%1, %0, 4), %%mm0\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2644 "movq (%2, %0), %%mm1\n\t" /* U0U1U2U3U4U5U6U7 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2645 "movq (%3, %0), %%mm2\n\t" /* V0V1V2V3V4V5V6V7 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2646 "movq %%mm0, %%mm3\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2647 "movq %%mm1, %%mm4\n\t" /* U0U1U2U3U4U5U6U7 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2648 "movq %%mm2, %%mm5\n\t" /* V0V1V2V3V4V5V6V7 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2649 "punpcklbw %%mm1, %%mm1\n\t" /* U0U0 U1U1 U2U2 U3U3 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2650 "punpcklbw %%mm2, %%mm2\n\t" /* V0V0 V1V1 V2V2 V3V3 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2651 "punpckhbw %%mm4, %%mm4\n\t" /* U4U4 U5U5 U6U6 U7U7 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2652 "punpckhbw %%mm5, %%mm5\n\t" /* V4V4 V5V5 V6V6 V7V7 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2653
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2654 "movq %%mm1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2655 "punpcklbw %%mm2, %%mm1\n\t" /* U0V0 U0V0 U1V1 U1V1*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2656 "punpcklbw %%mm1, %%mm0\n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2657 "punpckhbw %%mm1, %%mm3\n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2658 MOVNTQ" %%mm0, (%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2659 MOVNTQ" %%mm3, 8(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2660
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2661 "punpckhbw %%mm2, %%mm6\n\t" /* U2V2 U2V2 U3V3 U3V3*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2662 "movq 8(%1, %0, 4), %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2663 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2664 "punpcklbw %%mm6, %%mm0\n\t" /* Y U2 Y V2 Y U2 Y V2*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2665 "punpckhbw %%mm6, %%mm3\n\t" /* Y U3 Y V3 Y U3 Y V3*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2666 MOVNTQ" %%mm0, 16(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2667 MOVNTQ" %%mm3, 24(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2668
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2669 "movq %%mm4, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2670 "movq 16(%1, %0, 4), %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2671 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2672 "punpcklbw %%mm5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2673 "punpcklbw %%mm4, %%mm0\n\t" /* Y U4 Y V4 Y U4 Y V4*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2674 "punpckhbw %%mm4, %%mm3\n\t" /* Y U5 Y V5 Y U5 Y V5*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2675 MOVNTQ" %%mm0, 32(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2676 MOVNTQ" %%mm3, 40(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2677
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2678 "punpckhbw %%mm5, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2679 "movq 24(%1, %0, 4), %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2680 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2681 "punpcklbw %%mm6, %%mm0\n\t" /* Y U6 Y V6 Y U6 Y V6*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2682 "punpckhbw %%mm6, %%mm3\n\t" /* Y U7 Y V7 Y U7 Y V7*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2683 MOVNTQ" %%mm0, 48(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2684 MOVNTQ" %%mm3, 56(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2685
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2686 : "+r" (x)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2687 : "r"(yp), "r" (up), "r"(vp), "r"(d)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2688 :"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2689 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2690 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2691 for(; x<w; x++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2692 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2693 const long x2= x<<2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2694 d[8*x+0]=yp[x2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2695 d[8*x+1]=up[x];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2696 d[8*x+2]=yp[x2+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2697 d[8*x+3]=vp[x];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2698 d[8*x+4]=yp[x2+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2699 d[8*x+5]=up[x];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2700 d[8*x+6]=yp[x2+3];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2701 d[8*x+7]=vp[x];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2702 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2703 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2704 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2705 asm(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2706 EMMS" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2707 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2708 ::: "memory"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2709 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2710 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2711 }
22960
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2712
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2713 static inline void RENAME(rgb2rgb_init)(void){
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2714 rgb15to16= RENAME(rgb15to16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2715 rgb15to24= RENAME(rgb15to24);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2716 rgb15to32= RENAME(rgb15to32);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2717 rgb16to24= RENAME(rgb16to24);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2718 rgb16to32= RENAME(rgb16to32);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2719 rgb16to15= RENAME(rgb16to15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2720 rgb24to16= RENAME(rgb24to16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2721 rgb24to15= RENAME(rgb24to15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2722 rgb24to32= RENAME(rgb24to32);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2723 rgb32to16= RENAME(rgb32to16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2724 rgb32to15= RENAME(rgb32to15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2725 rgb32to24= RENAME(rgb32to24);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2726 rgb24tobgr15= RENAME(rgb24tobgr15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2727 rgb24tobgr16= RENAME(rgb24tobgr16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2728 rgb24tobgr24= RENAME(rgb24tobgr24);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2729 rgb32tobgr32= RENAME(rgb32tobgr32);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2730 rgb32tobgr16= RENAME(rgb32tobgr16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2731 rgb32tobgr15= RENAME(rgb32tobgr15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2732 yv12toyuy2= RENAME(yv12toyuy2);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2733 yv12touyvy= RENAME(yv12touyvy);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2734 yuv422ptoyuy2= RENAME(yuv422ptoyuy2);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2735 yuy2toyv12= RENAME(yuy2toyv12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2736 // uyvytoyv12= RENAME(uyvytoyv12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2737 // yvu9toyv12= RENAME(yvu9toyv12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2738 planar2x= RENAME(planar2x);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2739 rgb24toyv12= RENAME(rgb24toyv12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2740 interleaveBytes= RENAME(interleaveBytes);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2741 vu9_to_vu12= RENAME(vu9_to_vu12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2742 yvu9_to_yuy2= RENAME(yvu9_to_yuy2);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init.
ivo
parents: 21029
diff changeset
2743 }