Mercurial > mplayer.hg
annotate libswscale/rgb2rgb_template.c @ 30917:4ccce86f5c96
Remove unused MACOSX_BUNDLE config.mak entry.
author | diego |
---|---|
date | Thu, 25 Mar 2010 18:53:37 +0000 |
parents | 1032ff2e83f1 |
children | 0be6ed163321 |
rev | line source |
---|---|
18861 | 1 /* |
27158 | 2 * software RGB to RGB converter |
3 * pluralize by software PAL8 to RGB converter | |
4 * software YUV to YUV converter | |
5 * software YUV to RGB converter | |
6 * Written by Nick Kurshev. | |
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) | |
8 * lot of big-endian byte order fixes by Alex Beregszaszi | |
19703
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
9 * |
20094
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
10 * This file is part of FFmpeg. |
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
11 * |
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
12 * FFmpeg is free software; you can redistribute it and/or modify |
19703
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
13 * it under the terms of the GNU General Public License as published by |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
14 * the Free Software Foundation; either version 2 of the License, or |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
15 * (at your option) any later version. |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
16 * |
20094
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
17 * FFmpeg is distributed in the hope that it will be useful, |
19703
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
20 * GNU General Public License for more details. |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
21 * |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
22 * You should have received a copy of the GNU General Public License |
20094
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
23 * along with FFmpeg; if not, write to the Free Software |
23702 | 24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
23129 | 25 * |
27158 | 26 * The C code (not assembly, MMX, ...) of this file can be used |
25109 | 27 * under the LGPL license. |
18861 | 28 */ |
29 | |
30 #include <stddef.h> | |
31 | |
32 #undef PREFETCH | |
33 #undef MOVNTQ | |
34 #undef EMMS | |
35 #undef SFENCE | |
36 #undef MMREG_SIZE | |
37 #undef PAVGB | |
38 | |
28276 | 39 #if HAVE_SSE2 |
18861 | 40 #define MMREG_SIZE 16 |
41 #else | |
42 #define MMREG_SIZE 8 | |
43 #endif | |
44 | |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
45 #if HAVE_AMD3DNOW |
18861 | 46 #define PREFETCH "prefetch" |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
47 #define PAVGB "pavgusb" |
28276 | 48 #elif HAVE_MMX2 |
18861 | 49 #define PREFETCH "prefetchnta" |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
50 #define PAVGB "pavgb" |
18861 | 51 #else |
20724
b8fe18a742ce
Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents:
20094
diff
changeset
|
52 #define PREFETCH " # nop" |
18861 | 53 #endif |
54 | |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
55 #if HAVE_AMD3DNOW |
27782 | 56 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ |
18861 | 57 #define EMMS "femms" |
58 #else | |
59 #define EMMS "emms" | |
60 #endif | |
61 | |
28276 | 62 #if HAVE_MMX2 |
18861 | 63 #define MOVNTQ "movntq" |
64 #define SFENCE "sfence" | |
65 #else | |
66 #define MOVNTQ "movq" | |
20724
b8fe18a742ce
Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents:
20094
diff
changeset
|
67 #define SFENCE " # nop" |
18861 | 68 #endif |
69 | |
27486 | 70 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 71 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
72 uint8_t *dest = dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
73 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
74 const uint8_t *end; |
29612 | 75 #if HAVE_MMX |
29480 | 76 const uint8_t *mm_end; |
29612 | 77 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
78 end = s + src_size; |
29612 | 79 #if HAVE_MMX |
29480 | 80 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
81 mm_end = end - 23; | |
82 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); | |
29481 | 83 while (s < mm_end) { |
29480 | 84 __asm__ volatile( |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
85 PREFETCH" 32%1 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
86 "movd %1, %%mm0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
87 "punpckldq 3%1, %%mm0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
88 "movd 6%1, %%mm1 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
89 "punpckldq 9%1, %%mm1 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
90 "movd 12%1, %%mm2 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
91 "punpckldq 15%1, %%mm2 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
92 "movd 18%1, %%mm3 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
93 "punpckldq 21%1, %%mm3 \n\t" |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
94 "por %%mm7, %%mm0 \n\t" |
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
95 "por %%mm7, %%mm1 \n\t" |
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
96 "por %%mm7, %%mm2 \n\t" |
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
97 "por %%mm7, %%mm3 \n\t" |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
98 MOVNTQ" %%mm0, %0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
99 MOVNTQ" %%mm1, 8%0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
100 MOVNTQ" %%mm2, 16%0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
101 MOVNTQ" %%mm3, 24%0" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
102 :"=m"(*dest) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
103 :"m"(*s) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
104 :"memory"); |
29480 | 105 dest += 32; |
106 s += 24; | |
107 } | |
108 __asm__ volatile(SFENCE:::"memory"); | |
109 __asm__ volatile(EMMS:::"memory"); | |
29612 | 110 #endif |
29481 | 111 while (s < end) { |
29612 | 112 #if HAVE_BIGENDIAN |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
113 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
114 *dest++ = 255; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
115 *dest++ = s[2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
116 *dest++ = s[1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
117 *dest++ = s[0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
118 s+=3; |
29612 | 119 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
120 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
121 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
122 *dest++ = *s++; |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
123 *dest++ = 255; |
29612 | 124 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
125 } |
18861 | 126 } |
127 | |
27486 | 128 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 129 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
130 uint8_t *dest = dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
131 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
132 const uint8_t *end; |
28276 | 133 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
134 const uint8_t *mm_end; |
18861 | 135 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
136 end = s + src_size; |
28276 | 137 #if HAVE_MMX |
27744 | 138 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
139 mm_end = end - 31; |
29481 | 140 while (s < mm_end) { |
27744 | 141 __asm__ volatile( |
29480 | 142 PREFETCH" 32%1 \n\t" |
143 "movq %1, %%mm0 \n\t" | |
144 "movq 8%1, %%mm1 \n\t" | |
145 "movq 16%1, %%mm4 \n\t" | |
146 "movq 24%1, %%mm5 \n\t" | |
147 "movq %%mm0, %%mm2 \n\t" | |
148 "movq %%mm1, %%mm3 \n\t" | |
149 "movq %%mm4, %%mm6 \n\t" | |
150 "movq %%mm5, %%mm7 \n\t" | |
151 "psrlq $8, %%mm2 \n\t" | |
152 "psrlq $8, %%mm3 \n\t" | |
153 "psrlq $8, %%mm6 \n\t" | |
154 "psrlq $8, %%mm7 \n\t" | |
155 "pand %2, %%mm0 \n\t" | |
156 "pand %2, %%mm1 \n\t" | |
157 "pand %2, %%mm4 \n\t" | |
158 "pand %2, %%mm5 \n\t" | |
159 "pand %3, %%mm2 \n\t" | |
160 "pand %3, %%mm3 \n\t" | |
161 "pand %3, %%mm6 \n\t" | |
162 "pand %3, %%mm7 \n\t" | |
163 "por %%mm2, %%mm0 \n\t" | |
164 "por %%mm3, %%mm1 \n\t" | |
165 "por %%mm6, %%mm4 \n\t" | |
166 "por %%mm7, %%mm5 \n\t" | |
18861 | 167 |
29480 | 168 "movq %%mm1, %%mm2 \n\t" |
169 "movq %%mm4, %%mm3 \n\t" | |
170 "psllq $48, %%mm2 \n\t" | |
171 "psllq $32, %%mm3 \n\t" | |
172 "pand %4, %%mm2 \n\t" | |
173 "pand %5, %%mm3 \n\t" | |
174 "por %%mm2, %%mm0 \n\t" | |
175 "psrlq $16, %%mm1 \n\t" | |
176 "psrlq $32, %%mm4 \n\t" | |
177 "psllq $16, %%mm5 \n\t" | |
178 "por %%mm3, %%mm1 \n\t" | |
179 "pand %6, %%mm5 \n\t" | |
180 "por %%mm5, %%mm4 \n\t" | |
18861 | 181 |
29480 | 182 MOVNTQ" %%mm0, %0 \n\t" |
183 MOVNTQ" %%mm1, 8%0 \n\t" | |
184 MOVNTQ" %%mm4, 16%0" | |
185 :"=m"(*dest) | |
186 :"m"(*s),"m"(mask24l), | |
187 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
188 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
189 dest += 24; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
190 s += 32; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
191 } |
27744 | 192 __asm__ volatile(SFENCE:::"memory"); |
193 __asm__ volatile(EMMS:::"memory"); | |
18861 | 194 #endif |
29481 | 195 while (s < end) { |
29397 | 196 #if HAVE_BIGENDIAN |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
197 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
198 s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
199 dest[2] = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
200 dest[1] = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
201 dest[0] = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
202 dest += 3; |
18861 | 203 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
204 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
205 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
206 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
207 s++; |
18861 | 208 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
209 } |
18861 | 210 } |
211 | |
212 /* | |
27158 | 213 original by Strepto/Astral |
214 ported to gcc & bugfixed: A'rpi | |
18861 | 215 MMX2, 3DNOW optimization by Nick Kurshev |
27158 | 216 32-bit C version, and and&add trick by Michael Niedermayer |
18861 | 217 */ |
25750 | 218 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 219 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
220 register const uint8_t* s=src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
221 register uint8_t* d=dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
222 register const uint8_t *end; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
223 const uint8_t *mm_end; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
224 end = s + src_size; |
28276 | 225 #if HAVE_MMX |
27744 | 226 __asm__ volatile(PREFETCH" %0"::"m"(*s)); |
227 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
228 mm_end = end - 15; |
29481 | 229 while (s<mm_end) { |
27744 | 230 __asm__ volatile( |
29480 | 231 PREFETCH" 32%1 \n\t" |
232 "movq %1, %%mm0 \n\t" | |
233 "movq 8%1, %%mm2 \n\t" | |
234 "movq %%mm0, %%mm1 \n\t" | |
235 "movq %%mm2, %%mm3 \n\t" | |
236 "pand %%mm4, %%mm0 \n\t" | |
237 "pand %%mm4, %%mm2 \n\t" | |
238 "paddw %%mm1, %%mm0 \n\t" | |
239 "paddw %%mm3, %%mm2 \n\t" | |
240 MOVNTQ" %%mm0, %0 \n\t" | |
241 MOVNTQ" %%mm2, 8%0" | |
242 :"=m"(*d) | |
243 :"m"(*s) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
244 ); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
245 d+=16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
246 s+=16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
247 } |
27744 | 248 __asm__ volatile(SFENCE:::"memory"); |
249 __asm__ volatile(EMMS:::"memory"); | |
18861 | 250 #endif |
251 mm_end = end - 3; | |
29481 | 252 while (s < mm_end) { |
26910 | 253 register unsigned x= *((const uint32_t *)s); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
254 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
255 d+=4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
256 s+=4; |
18861 | 257 } |
29481 | 258 if (s < end) { |
26910 | 259 register unsigned short x= *((const uint16_t *)s); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
260 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); |
18861 | 261 } |
262 } | |
263 | |
25750 | 264 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 265 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
266 register const uint8_t* s=src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
267 register uint8_t* d=dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
268 register const uint8_t *end; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
269 const uint8_t *mm_end; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
270 end = s + src_size; |
28276 | 271 #if HAVE_MMX |
27744 | 272 __asm__ volatile(PREFETCH" %0"::"m"(*s)); |
273 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); | |
274 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
275 mm_end = end - 15; |
29481 | 276 while (s<mm_end) { |
27744 | 277 __asm__ volatile( |
29480 | 278 PREFETCH" 32%1 \n\t" |
279 "movq %1, %%mm0 \n\t" | |
280 "movq 8%1, %%mm2 \n\t" | |
281 "movq %%mm0, %%mm1 \n\t" | |
282 "movq %%mm2, %%mm3 \n\t" | |
283 "psrlq $1, %%mm0 \n\t" | |
284 "psrlq $1, %%mm2 \n\t" | |
285 "pand %%mm7, %%mm0 \n\t" | |
286 "pand %%mm7, %%mm2 \n\t" | |
287 "pand %%mm6, %%mm1 \n\t" | |
288 "pand %%mm6, %%mm3 \n\t" | |
289 "por %%mm1, %%mm0 \n\t" | |
290 "por %%mm3, %%mm2 \n\t" | |
291 MOVNTQ" %%mm0, %0 \n\t" | |
292 MOVNTQ" %%mm2, 8%0" | |
293 :"=m"(*d) | |
294 :"m"(*s) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
295 ); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
296 d+=16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
297 s+=16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
298 } |
27744 | 299 __asm__ volatile(SFENCE:::"memory"); |
300 __asm__ volatile(EMMS:::"memory"); | |
18861 | 301 #endif |
302 mm_end = end - 3; | |
29481 | 303 while (s < mm_end) { |
26925
3f6d2ca29727
restore needed cast to correct type with const
bcoudurier
parents:
26910
diff
changeset
|
304 register uint32_t x= *((const uint32_t*)s); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
305 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
306 s+=4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
307 d+=4; |
18861 | 308 } |
29481 | 309 if (s < end) { |
26925
3f6d2ca29727
restore needed cast to correct type with const
bcoudurier
parents:
26910
diff
changeset
|
310 register uint16_t x= *((const uint16_t*)s); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
311 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); |
18861 | 312 } |
313 } | |
314 | |
315 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size) | |
316 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
317 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
318 const uint8_t *end; |
28276 | 319 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
320 const uint8_t *mm_end; |
18861 | 321 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
322 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
323 end = s + src_size; |
28276 | 324 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
325 mm_end = end - 15; |
25109 | 326 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) |
27744 | 327 __asm__ volatile( |
29480 | 328 "movq %3, %%mm5 \n\t" |
329 "movq %4, %%mm6 \n\t" | |
330 "movq %5, %%mm7 \n\t" | |
331 "jmp 2f \n\t" | |
332 ASMALIGN(4) | |
333 "1: \n\t" | |
334 PREFETCH" 32(%1) \n\t" | |
335 "movd (%1), %%mm0 \n\t" | |
336 "movd 4(%1), %%mm3 \n\t" | |
337 "punpckldq 8(%1), %%mm0 \n\t" | |
338 "punpckldq 12(%1), %%mm3 \n\t" | |
339 "movq %%mm0, %%mm1 \n\t" | |
340 "movq %%mm3, %%mm4 \n\t" | |
341 "pand %%mm6, %%mm0 \n\t" | |
342 "pand %%mm6, %%mm3 \n\t" | |
343 "pmaddwd %%mm7, %%mm0 \n\t" | |
344 "pmaddwd %%mm7, %%mm3 \n\t" | |
345 "pand %%mm5, %%mm1 \n\t" | |
346 "pand %%mm5, %%mm4 \n\t" | |
347 "por %%mm1, %%mm0 \n\t" | |
348 "por %%mm4, %%mm3 \n\t" | |
349 "psrld $5, %%mm0 \n\t" | |
350 "pslld $11, %%mm3 \n\t" | |
351 "por %%mm3, %%mm0 \n\t" | |
352 MOVNTQ" %%mm0, (%0) \n\t" | |
353 "add $16, %1 \n\t" | |
354 "add $8, %0 \n\t" | |
355 "2: \n\t" | |
356 "cmp %2, %1 \n\t" | |
357 " jb 1b \n\t" | |
358 : "+r" (d), "+r"(s) | |
359 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
360 ); |
18861 | 361 #else |
27744 | 362 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
363 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
364 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
365 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
366 ::"m"(red_16mask),"m"(green_16mask)); |
29481 | 367 while (s < mm_end) { |
27744 | 368 __asm__ volatile( |
29480 | 369 PREFETCH" 32%1 \n\t" |
370 "movd %1, %%mm0 \n\t" | |
371 "movd 4%1, %%mm3 \n\t" | |
372 "punpckldq 8%1, %%mm0 \n\t" | |
373 "punpckldq 12%1, %%mm3 \n\t" | |
374 "movq %%mm0, %%mm1 \n\t" | |
375 "movq %%mm0, %%mm2 \n\t" | |
376 "movq %%mm3, %%mm4 \n\t" | |
377 "movq %%mm3, %%mm5 \n\t" | |
378 "psrlq $3, %%mm0 \n\t" | |
379 "psrlq $3, %%mm3 \n\t" | |
380 "pand %2, %%mm0 \n\t" | |
381 "pand %2, %%mm3 \n\t" | |
382 "psrlq $5, %%mm1 \n\t" | |
383 "psrlq $5, %%mm4 \n\t" | |
384 "pand %%mm6, %%mm1 \n\t" | |
385 "pand %%mm6, %%mm4 \n\t" | |
386 "psrlq $8, %%mm2 \n\t" | |
387 "psrlq $8, %%mm5 \n\t" | |
388 "pand %%mm7, %%mm2 \n\t" | |
389 "pand %%mm7, %%mm5 \n\t" | |
390 "por %%mm1, %%mm0 \n\t" | |
391 "por %%mm4, %%mm3 \n\t" | |
392 "por %%mm2, %%mm0 \n\t" | |
393 "por %%mm5, %%mm3 \n\t" | |
394 "psllq $16, %%mm3 \n\t" | |
395 "por %%mm3, %%mm0 \n\t" | |
396 MOVNTQ" %%mm0, %0 \n\t" | |
397 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
398 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
399 s += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
400 } |
18861 | 401 #endif |
27744 | 402 __asm__ volatile(SFENCE:::"memory"); |
403 __asm__ volatile(EMMS:::"memory"); | |
18861 | 404 #endif |
29481 | 405 while (s < end) { |
26910 | 406 register int rgb = *(const uint32_t*)s; s += 4; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
407 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
408 } |
18861 | 409 } |
410 | |
411 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) | |
412 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
413 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
414 const uint8_t *end; |
28276 | 415 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
416 const uint8_t *mm_end; |
18861 | 417 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
418 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
419 end = s + src_size; |
28276 | 420 #if HAVE_MMX |
27744 | 421 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
422 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
423 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
424 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
425 ::"m"(red_16mask),"m"(green_16mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
426 mm_end = end - 15; |
29481 | 427 while (s < mm_end) { |
27744 | 428 __asm__ volatile( |
29480 | 429 PREFETCH" 32%1 \n\t" |
430 "movd %1, %%mm0 \n\t" | |
431 "movd 4%1, %%mm3 \n\t" | |
432 "punpckldq 8%1, %%mm0 \n\t" | |
433 "punpckldq 12%1, %%mm3 \n\t" | |
434 "movq %%mm0, %%mm1 \n\t" | |
435 "movq %%mm0, %%mm2 \n\t" | |
436 "movq %%mm3, %%mm4 \n\t" | |
437 "movq %%mm3, %%mm5 \n\t" | |
438 "psllq $8, %%mm0 \n\t" | |
439 "psllq $8, %%mm3 \n\t" | |
440 "pand %%mm7, %%mm0 \n\t" | |
441 "pand %%mm7, %%mm3 \n\t" | |
442 "psrlq $5, %%mm1 \n\t" | |
443 "psrlq $5, %%mm4 \n\t" | |
444 "pand %%mm6, %%mm1 \n\t" | |
445 "pand %%mm6, %%mm4 \n\t" | |
446 "psrlq $19, %%mm2 \n\t" | |
447 "psrlq $19, %%mm5 \n\t" | |
448 "pand %2, %%mm2 \n\t" | |
449 "pand %2, %%mm5 \n\t" | |
450 "por %%mm1, %%mm0 \n\t" | |
451 "por %%mm4, %%mm3 \n\t" | |
452 "por %%mm2, %%mm0 \n\t" | |
453 "por %%mm5, %%mm3 \n\t" | |
454 "psllq $16, %%mm3 \n\t" | |
455 "por %%mm3, %%mm0 \n\t" | |
456 MOVNTQ" %%mm0, %0 \n\t" | |
457 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
458 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
459 s += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
460 } |
27744 | 461 __asm__ volatile(SFENCE:::"memory"); |
462 __asm__ volatile(EMMS:::"memory"); | |
18861 | 463 #endif |
29481 | 464 while (s < end) { |
26910 | 465 register int rgb = *(const uint32_t*)s; s += 4; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
466 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
467 } |
18861 | 468 } |
469 | |
470 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size) | |
471 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
472 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
473 const uint8_t *end; |
28276 | 474 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
475 const uint8_t *mm_end; |
18861 | 476 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
477 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
478 end = s + src_size; |
28276 | 479 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
480 mm_end = end - 15; |
25109 | 481 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) |
27744 | 482 __asm__ volatile( |
29480 | 483 "movq %3, %%mm5 \n\t" |
484 "movq %4, %%mm6 \n\t" | |
485 "movq %5, %%mm7 \n\t" | |
486 "jmp 2f \n\t" | |
487 ASMALIGN(4) | |
488 "1: \n\t" | |
489 PREFETCH" 32(%1) \n\t" | |
490 "movd (%1), %%mm0 \n\t" | |
491 "movd 4(%1), %%mm3 \n\t" | |
492 "punpckldq 8(%1), %%mm0 \n\t" | |
493 "punpckldq 12(%1), %%mm3 \n\t" | |
494 "movq %%mm0, %%mm1 \n\t" | |
495 "movq %%mm3, %%mm4 \n\t" | |
496 "pand %%mm6, %%mm0 \n\t" | |
497 "pand %%mm6, %%mm3 \n\t" | |
498 "pmaddwd %%mm7, %%mm0 \n\t" | |
499 "pmaddwd %%mm7, %%mm3 \n\t" | |
500 "pand %%mm5, %%mm1 \n\t" | |
501 "pand %%mm5, %%mm4 \n\t" | |
502 "por %%mm1, %%mm0 \n\t" | |
503 "por %%mm4, %%mm3 \n\t" | |
504 "psrld $6, %%mm0 \n\t" | |
505 "pslld $10, %%mm3 \n\t" | |
506 "por %%mm3, %%mm0 \n\t" | |
507 MOVNTQ" %%mm0, (%0) \n\t" | |
508 "add $16, %1 \n\t" | |
509 "add $8, %0 \n\t" | |
510 "2: \n\t" | |
511 "cmp %2, %1 \n\t" | |
512 " jb 1b \n\t" | |
513 : "+r" (d), "+r"(s) | |
514 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
515 ); |
18861 | 516 #else |
27744 | 517 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
518 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
519 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
520 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
521 ::"m"(red_15mask),"m"(green_15mask)); |
29481 | 522 while (s < mm_end) { |
27744 | 523 __asm__ volatile( |
29480 | 524 PREFETCH" 32%1 \n\t" |
525 "movd %1, %%mm0 \n\t" | |
526 "movd 4%1, %%mm3 \n\t" | |
527 "punpckldq 8%1, %%mm0 \n\t" | |
528 "punpckldq 12%1, %%mm3 \n\t" | |
529 "movq %%mm0, %%mm1 \n\t" | |
530 "movq %%mm0, %%mm2 \n\t" | |
531 "movq %%mm3, %%mm4 \n\t" | |
532 "movq %%mm3, %%mm5 \n\t" | |
533 "psrlq $3, %%mm0 \n\t" | |
534 "psrlq $3, %%mm3 \n\t" | |
535 "pand %2, %%mm0 \n\t" | |
536 "pand %2, %%mm3 \n\t" | |
537 "psrlq $6, %%mm1 \n\t" | |
538 "psrlq $6, %%mm4 \n\t" | |
539 "pand %%mm6, %%mm1 \n\t" | |
540 "pand %%mm6, %%mm4 \n\t" | |
541 "psrlq $9, %%mm2 \n\t" | |
542 "psrlq $9, %%mm5 \n\t" | |
543 "pand %%mm7, %%mm2 \n\t" | |
544 "pand %%mm7, %%mm5 \n\t" | |
545 "por %%mm1, %%mm0 \n\t" | |
546 "por %%mm4, %%mm3 \n\t" | |
547 "por %%mm2, %%mm0 \n\t" | |
548 "por %%mm5, %%mm3 \n\t" | |
549 "psllq $16, %%mm3 \n\t" | |
550 "por %%mm3, %%mm0 \n\t" | |
551 MOVNTQ" %%mm0, %0 \n\t" | |
552 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
553 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
554 s += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
555 } |
18861 | 556 #endif |
27744 | 557 __asm__ volatile(SFENCE:::"memory"); |
558 __asm__ volatile(EMMS:::"memory"); | |
18861 | 559 #endif |
29481 | 560 while (s < end) { |
26910 | 561 register int rgb = *(const uint32_t*)s; s += 4; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
562 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
563 } |
18861 | 564 } |
565 | |
566 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) | |
567 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
568 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
569 const uint8_t *end; |
28276 | 570 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
571 const uint8_t *mm_end; |
18861 | 572 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
573 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
574 end = s + src_size; |
28276 | 575 #if HAVE_MMX |
27744 | 576 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
577 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
578 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
579 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
580 ::"m"(red_15mask),"m"(green_15mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
581 mm_end = end - 15; |
29481 | 582 while (s < mm_end) { |
27744 | 583 __asm__ volatile( |
29480 | 584 PREFETCH" 32%1 \n\t" |
585 "movd %1, %%mm0 \n\t" | |
586 "movd 4%1, %%mm3 \n\t" | |
587 "punpckldq 8%1, %%mm0 \n\t" | |
588 "punpckldq 12%1, %%mm3 \n\t" | |
589 "movq %%mm0, %%mm1 \n\t" | |
590 "movq %%mm0, %%mm2 \n\t" | |
591 "movq %%mm3, %%mm4 \n\t" | |
592 "movq %%mm3, %%mm5 \n\t" | |
593 "psllq $7, %%mm0 \n\t" | |
594 "psllq $7, %%mm3 \n\t" | |
595 "pand %%mm7, %%mm0 \n\t" | |
596 "pand %%mm7, %%mm3 \n\t" | |
597 "psrlq $6, %%mm1 \n\t" | |
598 "psrlq $6, %%mm4 \n\t" | |
599 "pand %%mm6, %%mm1 \n\t" | |
600 "pand %%mm6, %%mm4 \n\t" | |
601 "psrlq $19, %%mm2 \n\t" | |
602 "psrlq $19, %%mm5 \n\t" | |
603 "pand %2, %%mm2 \n\t" | |
604 "pand %2, %%mm5 \n\t" | |
605 "por %%mm1, %%mm0 \n\t" | |
606 "por %%mm4, %%mm3 \n\t" | |
607 "por %%mm2, %%mm0 \n\t" | |
608 "por %%mm5, %%mm3 \n\t" | |
609 "psllq $16, %%mm3 \n\t" | |
610 "por %%mm3, %%mm0 \n\t" | |
611 MOVNTQ" %%mm0, %0 \n\t" | |
612 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
613 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
614 s += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
615 } |
27744 | 616 __asm__ volatile(SFENCE:::"memory"); |
617 __asm__ volatile(EMMS:::"memory"); | |
18861 | 618 #endif |
29481 | 619 while (s < end) { |
26910 | 620 register int rgb = *(const uint32_t*)s; s += 4; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
621 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
622 } |
18861 | 623 } |
624 | |
27486 | 625 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 626 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
627 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
628 const uint8_t *end; |
28276 | 629 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
630 const uint8_t *mm_end; |
18861 | 631 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
632 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
633 end = s + src_size; |
28276 | 634 #if HAVE_MMX |
27744 | 635 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
636 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
637 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
638 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
639 ::"m"(red_16mask),"m"(green_16mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
640 mm_end = end - 11; |
29481 | 641 while (s < mm_end) { |
27744 | 642 __asm__ volatile( |
29480 | 643 PREFETCH" 32%1 \n\t" |
644 "movd %1, %%mm0 \n\t" | |
645 "movd 3%1, %%mm3 \n\t" | |
646 "punpckldq 6%1, %%mm0 \n\t" | |
647 "punpckldq 9%1, %%mm3 \n\t" | |
648 "movq %%mm0, %%mm1 \n\t" | |
649 "movq %%mm0, %%mm2 \n\t" | |
650 "movq %%mm3, %%mm4 \n\t" | |
651 "movq %%mm3, %%mm5 \n\t" | |
652 "psrlq $3, %%mm0 \n\t" | |
653 "psrlq $3, %%mm3 \n\t" | |
654 "pand %2, %%mm0 \n\t" | |
655 "pand %2, %%mm3 \n\t" | |
656 "psrlq $5, %%mm1 \n\t" | |
657 "psrlq $5, %%mm4 \n\t" | |
658 "pand %%mm6, %%mm1 \n\t" | |
659 "pand %%mm6, %%mm4 \n\t" | |
660 "psrlq $8, %%mm2 \n\t" | |
661 "psrlq $8, %%mm5 \n\t" | |
662 "pand %%mm7, %%mm2 \n\t" | |
663 "pand %%mm7, %%mm5 \n\t" | |
664 "por %%mm1, %%mm0 \n\t" | |
665 "por %%mm4, %%mm3 \n\t" | |
666 "por %%mm2, %%mm0 \n\t" | |
667 "por %%mm5, %%mm3 \n\t" | |
668 "psllq $16, %%mm3 \n\t" | |
669 "por %%mm3, %%mm0 \n\t" | |
670 MOVNTQ" %%mm0, %0 \n\t" | |
671 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
672 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
673 s += 12; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
674 } |
27744 | 675 __asm__ volatile(SFENCE:::"memory"); |
676 __asm__ volatile(EMMS:::"memory"); | |
18861 | 677 #endif |
29481 | 678 while (s < end) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
679 const int b = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
680 const int g = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
681 const int r = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
682 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
683 } |
18861 | 684 } |
685 | |
27486 | 686 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 687 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
688 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
689 const uint8_t *end; |
28276 | 690 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
691 const uint8_t *mm_end; |
18861 | 692 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
693 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
694 end = s + src_size; |
28276 | 695 #if HAVE_MMX |
27744 | 696 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
697 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
698 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
699 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
700 ::"m"(red_16mask),"m"(green_16mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
701 mm_end = end - 15; |
29481 | 702 while (s < mm_end) { |
27744 | 703 __asm__ volatile( |
29480 | 704 PREFETCH" 32%1 \n\t" |
705 "movd %1, %%mm0 \n\t" | |
706 "movd 3%1, %%mm3 \n\t" | |
707 "punpckldq 6%1, %%mm0 \n\t" | |
708 "punpckldq 9%1, %%mm3 \n\t" | |
709 "movq %%mm0, %%mm1 \n\t" | |
710 "movq %%mm0, %%mm2 \n\t" | |
711 "movq %%mm3, %%mm4 \n\t" | |
712 "movq %%mm3, %%mm5 \n\t" | |
713 "psllq $8, %%mm0 \n\t" | |
714 "psllq $8, %%mm3 \n\t" | |
715 "pand %%mm7, %%mm0 \n\t" | |
716 "pand %%mm7, %%mm3 \n\t" | |
717 "psrlq $5, %%mm1 \n\t" | |
718 "psrlq $5, %%mm4 \n\t" | |
719 "pand %%mm6, %%mm1 \n\t" | |
720 "pand %%mm6, %%mm4 \n\t" | |
721 "psrlq $19, %%mm2 \n\t" | |
722 "psrlq $19, %%mm5 \n\t" | |
723 "pand %2, %%mm2 \n\t" | |
724 "pand %2, %%mm5 \n\t" | |
725 "por %%mm1, %%mm0 \n\t" | |
726 "por %%mm4, %%mm3 \n\t" | |
727 "por %%mm2, %%mm0 \n\t" | |
728 "por %%mm5, %%mm3 \n\t" | |
729 "psllq $16, %%mm3 \n\t" | |
730 "por %%mm3, %%mm0 \n\t" | |
731 MOVNTQ" %%mm0, %0 \n\t" | |
732 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
733 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
734 s += 12; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
735 } |
27744 | 736 __asm__ volatile(SFENCE:::"memory"); |
737 __asm__ volatile(EMMS:::"memory"); | |
18861 | 738 #endif |
29481 | 739 while (s < end) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
740 const int r = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
741 const int g = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
742 const int b = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
743 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
744 } |
18861 | 745 } |
746 | |
27486 | 747 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 748 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
749 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
750 const uint8_t *end; |
28276 | 751 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
752 const uint8_t *mm_end; |
18861 | 753 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
754 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
755 end = s + src_size; |
28276 | 756 #if HAVE_MMX |
27744 | 757 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
758 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
759 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
760 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
761 ::"m"(red_15mask),"m"(green_15mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
762 mm_end = end - 11; |
29481 | 763 while (s < mm_end) { |
27744 | 764 __asm__ volatile( |
29480 | 765 PREFETCH" 32%1 \n\t" |
766 "movd %1, %%mm0 \n\t" | |
767 "movd 3%1, %%mm3 \n\t" | |
768 "punpckldq 6%1, %%mm0 \n\t" | |
769 "punpckldq 9%1, %%mm3 \n\t" | |
770 "movq %%mm0, %%mm1 \n\t" | |
771 "movq %%mm0, %%mm2 \n\t" | |
772 "movq %%mm3, %%mm4 \n\t" | |
773 "movq %%mm3, %%mm5 \n\t" | |
774 "psrlq $3, %%mm0 \n\t" | |
775 "psrlq $3, %%mm3 \n\t" | |
776 "pand %2, %%mm0 \n\t" | |
777 "pand %2, %%mm3 \n\t" | |
778 "psrlq $6, %%mm1 \n\t" | |
779 "psrlq $6, %%mm4 \n\t" | |
780 "pand %%mm6, %%mm1 \n\t" | |
781 "pand %%mm6, %%mm4 \n\t" | |
782 "psrlq $9, %%mm2 \n\t" | |
783 "psrlq $9, %%mm5 \n\t" | |
784 "pand %%mm7, %%mm2 \n\t" | |
785 "pand %%mm7, %%mm5 \n\t" | |
786 "por %%mm1, %%mm0 \n\t" | |
787 "por %%mm4, %%mm3 \n\t" | |
788 "por %%mm2, %%mm0 \n\t" | |
789 "por %%mm5, %%mm3 \n\t" | |
790 "psllq $16, %%mm3 \n\t" | |
791 "por %%mm3, %%mm0 \n\t" | |
792 MOVNTQ" %%mm0, %0 \n\t" | |
793 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
794 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
795 s += 12; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
796 } |
27744 | 797 __asm__ volatile(SFENCE:::"memory"); |
798 __asm__ volatile(EMMS:::"memory"); | |
18861 | 799 #endif |
29481 | 800 while (s < end) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
801 const int b = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
802 const int g = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
803 const int r = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
804 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
805 } |
18861 | 806 } |
807 | |
27486 | 808 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 809 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
810 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
811 const uint8_t *end; |
28276 | 812 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
813 const uint8_t *mm_end; |
18861 | 814 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
815 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
816 end = s + src_size; |
28276 | 817 #if HAVE_MMX |
27744 | 818 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
819 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
820 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
821 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
822 ::"m"(red_15mask),"m"(green_15mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
823 mm_end = end - 15; |
29481 | 824 while (s < mm_end) { |
27744 | 825 __asm__ volatile( |
29480 | 826 PREFETCH" 32%1 \n\t" |
827 "movd %1, %%mm0 \n\t" | |
828 "movd 3%1, %%mm3 \n\t" | |
829 "punpckldq 6%1, %%mm0 \n\t" | |
830 "punpckldq 9%1, %%mm3 \n\t" | |
831 "movq %%mm0, %%mm1 \n\t" | |
832 "movq %%mm0, %%mm2 \n\t" | |
833 "movq %%mm3, %%mm4 \n\t" | |
834 "movq %%mm3, %%mm5 \n\t" | |
835 "psllq $7, %%mm0 \n\t" | |
836 "psllq $7, %%mm3 \n\t" | |
837 "pand %%mm7, %%mm0 \n\t" | |
838 "pand %%mm7, %%mm3 \n\t" | |
839 "psrlq $6, %%mm1 \n\t" | |
840 "psrlq $6, %%mm4 \n\t" | |
841 "pand %%mm6, %%mm1 \n\t" | |
842 "pand %%mm6, %%mm4 \n\t" | |
843 "psrlq $19, %%mm2 \n\t" | |
844 "psrlq $19, %%mm5 \n\t" | |
845 "pand %2, %%mm2 \n\t" | |
846 "pand %2, %%mm5 \n\t" | |
847 "por %%mm1, %%mm0 \n\t" | |
848 "por %%mm4, %%mm3 \n\t" | |
849 "por %%mm2, %%mm0 \n\t" | |
850 "por %%mm5, %%mm3 \n\t" | |
851 "psllq $16, %%mm3 \n\t" | |
852 "por %%mm3, %%mm0 \n\t" | |
853 MOVNTQ" %%mm0, %0 \n\t" | |
854 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
855 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
856 s += 12; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
857 } |
27744 | 858 __asm__ volatile(SFENCE:::"memory"); |
859 __asm__ volatile(EMMS:::"memory"); | |
18861 | 860 #endif |
29481 | 861 while (s < end) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
862 const int r = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
863 const int g = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
864 const int b = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
865 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
866 } |
18861 | 867 } |
868 | |
869 /* | |
25109 | 870 I use less accurate approximation here by simply left-shifting the input |
871 value and filling the low order bits with zeroes. This method improves PNG | |
872 compression but this scheme cannot reproduce white exactly, since it does | |
873 not generate an all-ones maximum value; the net effect is to darken the | |
18861 | 874 image slightly. |
875 | |
876 The better method should be "left bit replication": | |
877 | |
878 4 3 2 1 0 | |
879 --------- | |
880 1 1 0 1 1 | |
881 | |
882 7 6 5 4 3 2 1 0 | |
883 ---------------- | |
884 1 1 0 1 1 1 1 0 | |
885 |=======| |===| | |
27158 | 886 | leftmost bits repeated to fill open bits |
18861 | 887 | |
27158 | 888 original bits |
18861 | 889 */ |
27486 | 890 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 891 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
892 const uint16_t *end; |
28276 | 893 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
894 const uint16_t *mm_end; |
18861 | 895 #endif |
26909 | 896 uint8_t *d = dst; |
26910 | 897 const uint16_t *s = (const uint16_t*)src; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
898 end = s + src_size/2; |
28276 | 899 #if HAVE_MMX |
27744 | 900 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
901 mm_end = end - 7; |
29481 | 902 while (s < mm_end) { |
27744 | 903 __asm__ volatile( |
29480 | 904 PREFETCH" 32%1 \n\t" |
905 "movq %1, %%mm0 \n\t" | |
906 "movq %1, %%mm1 \n\t" | |
907 "movq %1, %%mm2 \n\t" | |
908 "pand %2, %%mm0 \n\t" | |
909 "pand %3, %%mm1 \n\t" | |
910 "pand %4, %%mm2 \n\t" | |
911 "psllq $3, %%mm0 \n\t" | |
912 "psrlq $2, %%mm1 \n\t" | |
913 "psrlq $7, %%mm2 \n\t" | |
914 "movq %%mm0, %%mm3 \n\t" | |
915 "movq %%mm1, %%mm4 \n\t" | |
916 "movq %%mm2, %%mm5 \n\t" | |
917 "punpcklwd %5, %%mm0 \n\t" | |
918 "punpcklwd %5, %%mm1 \n\t" | |
919 "punpcklwd %5, %%mm2 \n\t" | |
920 "punpckhwd %5, %%mm3 \n\t" | |
921 "punpckhwd %5, %%mm4 \n\t" | |
922 "punpckhwd %5, %%mm5 \n\t" | |
923 "psllq $8, %%mm1 \n\t" | |
924 "psllq $16, %%mm2 \n\t" | |
925 "por %%mm1, %%mm0 \n\t" | |
926 "por %%mm2, %%mm0 \n\t" | |
927 "psllq $8, %%mm4 \n\t" | |
928 "psllq $16, %%mm5 \n\t" | |
929 "por %%mm4, %%mm3 \n\t" | |
930 "por %%mm5, %%mm3 \n\t" | |
18861 | 931 |
29480 | 932 "movq %%mm0, %%mm6 \n\t" |
933 "movq %%mm3, %%mm7 \n\t" | |
23129 | 934 |
29480 | 935 "movq 8%1, %%mm0 \n\t" |
936 "movq 8%1, %%mm1 \n\t" | |
937 "movq 8%1, %%mm2 \n\t" | |
938 "pand %2, %%mm0 \n\t" | |
939 "pand %3, %%mm1 \n\t" | |
940 "pand %4, %%mm2 \n\t" | |
941 "psllq $3, %%mm0 \n\t" | |
942 "psrlq $2, %%mm1 \n\t" | |
943 "psrlq $7, %%mm2 \n\t" | |
944 "movq %%mm0, %%mm3 \n\t" | |
945 "movq %%mm1, %%mm4 \n\t" | |
946 "movq %%mm2, %%mm5 \n\t" | |
947 "punpcklwd %5, %%mm0 \n\t" | |
948 "punpcklwd %5, %%mm1 \n\t" | |
949 "punpcklwd %5, %%mm2 \n\t" | |
950 "punpckhwd %5, %%mm3 \n\t" | |
951 "punpckhwd %5, %%mm4 \n\t" | |
952 "punpckhwd %5, %%mm5 \n\t" | |
953 "psllq $8, %%mm1 \n\t" | |
954 "psllq $16, %%mm2 \n\t" | |
955 "por %%mm1, %%mm0 \n\t" | |
956 "por %%mm2, %%mm0 \n\t" | |
957 "psllq $8, %%mm4 \n\t" | |
958 "psllq $16, %%mm5 \n\t" | |
959 "por %%mm4, %%mm3 \n\t" | |
960 "por %%mm5, %%mm3 \n\t" | |
18861 | 961 |
29480 | 962 :"=m"(*d) |
963 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | |
964 :"memory"); | |
27158 | 965 /* borrowed 32 to 24 */ |
27744 | 966 __asm__ volatile( |
29480 | 967 "movq %%mm0, %%mm4 \n\t" |
968 "movq %%mm3, %%mm5 \n\t" | |
969 "movq %%mm6, %%mm0 \n\t" | |
970 "movq %%mm7, %%mm1 \n\t" | |
23129 | 971 |
29480 | 972 "movq %%mm4, %%mm6 \n\t" |
973 "movq %%mm5, %%mm7 \n\t" | |
974 "movq %%mm0, %%mm2 \n\t" | |
975 "movq %%mm1, %%mm3 \n\t" | |
18861 | 976 |
29480 | 977 "psrlq $8, %%mm2 \n\t" |
978 "psrlq $8, %%mm3 \n\t" | |
979 "psrlq $8, %%mm6 \n\t" | |
980 "psrlq $8, %%mm7 \n\t" | |
981 "pand %2, %%mm0 \n\t" | |
982 "pand %2, %%mm1 \n\t" | |
983 "pand %2, %%mm4 \n\t" | |
984 "pand %2, %%mm5 \n\t" | |
985 "pand %3, %%mm2 \n\t" | |
986 "pand %3, %%mm3 \n\t" | |
987 "pand %3, %%mm6 \n\t" | |
988 "pand %3, %%mm7 \n\t" | |
989 "por %%mm2, %%mm0 \n\t" | |
990 "por %%mm3, %%mm1 \n\t" | |
991 "por %%mm6, %%mm4 \n\t" | |
992 "por %%mm7, %%mm5 \n\t" | |
18861 | 993 |
29480 | 994 "movq %%mm1, %%mm2 \n\t" |
995 "movq %%mm4, %%mm3 \n\t" | |
996 "psllq $48, %%mm2 \n\t" | |
997 "psllq $32, %%mm3 \n\t" | |
998 "pand %4, %%mm2 \n\t" | |
999 "pand %5, %%mm3 \n\t" | |
1000 "por %%mm2, %%mm0 \n\t" | |
1001 "psrlq $16, %%mm1 \n\t" | |
1002 "psrlq $32, %%mm4 \n\t" | |
1003 "psllq $16, %%mm5 \n\t" | |
1004 "por %%mm3, %%mm1 \n\t" | |
1005 "pand %6, %%mm5 \n\t" | |
1006 "por %%mm5, %%mm4 \n\t" | |
18861 | 1007 |
29480 | 1008 MOVNTQ" %%mm0, %0 \n\t" |
1009 MOVNTQ" %%mm1, 8%0 \n\t" | |
1010 MOVNTQ" %%mm4, 16%0" | |
18861 | 1011 |
29480 | 1012 :"=m"(*d) |
1013 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
1014 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1015 d += 24; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1016 s += 8; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1017 } |
27744 | 1018 __asm__ volatile(SFENCE:::"memory"); |
1019 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1020 #endif |
29481 | 1021 while (s < end) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1022 register uint16_t bgr; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1023 bgr = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1024 *d++ = (bgr&0x1F)<<3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1025 *d++ = (bgr&0x3E0)>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1026 *d++ = (bgr&0x7C00)>>7; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1027 } |
18861 | 1028 } |
1029 | |
27486 | 1030 static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 1031 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1032 const uint16_t *end; |
28276 | 1033 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1034 const uint16_t *mm_end; |
18861 | 1035 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1036 uint8_t *d = (uint8_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1037 const uint16_t *s = (const uint16_t *)src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1038 end = s + src_size/2; |
28276 | 1039 #if HAVE_MMX |
27744 | 1040 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1041 mm_end = end - 7; |
29481 | 1042 while (s < mm_end) { |
27744 | 1043 __asm__ volatile( |
29480 | 1044 PREFETCH" 32%1 \n\t" |
1045 "movq %1, %%mm0 \n\t" | |
1046 "movq %1, %%mm1 \n\t" | |
1047 "movq %1, %%mm2 \n\t" | |
1048 "pand %2, %%mm0 \n\t" | |
1049 "pand %3, %%mm1 \n\t" | |
1050 "pand %4, %%mm2 \n\t" | |
1051 "psllq $3, %%mm0 \n\t" | |
1052 "psrlq $3, %%mm1 \n\t" | |
1053 "psrlq $8, %%mm2 \n\t" | |
1054 "movq %%mm0, %%mm3 \n\t" | |
1055 "movq %%mm1, %%mm4 \n\t" | |
1056 "movq %%mm2, %%mm5 \n\t" | |
1057 "punpcklwd %5, %%mm0 \n\t" | |
1058 "punpcklwd %5, %%mm1 \n\t" | |
1059 "punpcklwd %5, %%mm2 \n\t" | |
1060 "punpckhwd %5, %%mm3 \n\t" | |
1061 "punpckhwd %5, %%mm4 \n\t" | |
1062 "punpckhwd %5, %%mm5 \n\t" | |
1063 "psllq $8, %%mm1 \n\t" | |
1064 "psllq $16, %%mm2 \n\t" | |
1065 "por %%mm1, %%mm0 \n\t" | |
1066 "por %%mm2, %%mm0 \n\t" | |
1067 "psllq $8, %%mm4 \n\t" | |
1068 "psllq $16, %%mm5 \n\t" | |
1069 "por %%mm4, %%mm3 \n\t" | |
1070 "por %%mm5, %%mm3 \n\t" | |
23129 | 1071 |
29480 | 1072 "movq %%mm0, %%mm6 \n\t" |
1073 "movq %%mm3, %%mm7 \n\t" | |
18861 | 1074 |
29480 | 1075 "movq 8%1, %%mm0 \n\t" |
1076 "movq 8%1, %%mm1 \n\t" | |
1077 "movq 8%1, %%mm2 \n\t" | |
1078 "pand %2, %%mm0 \n\t" | |
1079 "pand %3, %%mm1 \n\t" | |
1080 "pand %4, %%mm2 \n\t" | |
1081 "psllq $3, %%mm0 \n\t" | |
1082 "psrlq $3, %%mm1 \n\t" | |
1083 "psrlq $8, %%mm2 \n\t" | |
1084 "movq %%mm0, %%mm3 \n\t" | |
1085 "movq %%mm1, %%mm4 \n\t" | |
1086 "movq %%mm2, %%mm5 \n\t" | |
1087 "punpcklwd %5, %%mm0 \n\t" | |
1088 "punpcklwd %5, %%mm1 \n\t" | |
1089 "punpcklwd %5, %%mm2 \n\t" | |
1090 "punpckhwd %5, %%mm3 \n\t" | |
1091 "punpckhwd %5, %%mm4 \n\t" | |
1092 "punpckhwd %5, %%mm5 \n\t" | |
1093 "psllq $8, %%mm1 \n\t" | |
1094 "psllq $16, %%mm2 \n\t" | |
1095 "por %%mm1, %%mm0 \n\t" | |
1096 "por %%mm2, %%mm0 \n\t" | |
1097 "psllq $8, %%mm4 \n\t" | |
1098 "psllq $16, %%mm5 \n\t" | |
1099 "por %%mm4, %%mm3 \n\t" | |
1100 "por %%mm5, %%mm3 \n\t" | |
1101 :"=m"(*d) | |
1102 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |
1103 :"memory"); | |
27158 | 1104 /* borrowed 32 to 24 */ |
27744 | 1105 __asm__ volatile( |
29480 | 1106 "movq %%mm0, %%mm4 \n\t" |
1107 "movq %%mm3, %%mm5 \n\t" | |
1108 "movq %%mm6, %%mm0 \n\t" | |
1109 "movq %%mm7, %%mm1 \n\t" | |
23129 | 1110 |
29480 | 1111 "movq %%mm4, %%mm6 \n\t" |
1112 "movq %%mm5, %%mm7 \n\t" | |
1113 "movq %%mm0, %%mm2 \n\t" | |
1114 "movq %%mm1, %%mm3 \n\t" | |
18861 | 1115 |
29480 | 1116 "psrlq $8, %%mm2 \n\t" |
1117 "psrlq $8, %%mm3 \n\t" | |
1118 "psrlq $8, %%mm6 \n\t" | |
1119 "psrlq $8, %%mm7 \n\t" | |
1120 "pand %2, %%mm0 \n\t" | |
1121 "pand %2, %%mm1 \n\t" | |
1122 "pand %2, %%mm4 \n\t" | |
1123 "pand %2, %%mm5 \n\t" | |
1124 "pand %3, %%mm2 \n\t" | |
1125 "pand %3, %%mm3 \n\t" | |
1126 "pand %3, %%mm6 \n\t" | |
1127 "pand %3, %%mm7 \n\t" | |
1128 "por %%mm2, %%mm0 \n\t" | |
1129 "por %%mm3, %%mm1 \n\t" | |
1130 "por %%mm6, %%mm4 \n\t" | |
1131 "por %%mm7, %%mm5 \n\t" | |
18861 | 1132 |
29480 | 1133 "movq %%mm1, %%mm2 \n\t" |
1134 "movq %%mm4, %%mm3 \n\t" | |
1135 "psllq $48, %%mm2 \n\t" | |
1136 "psllq $32, %%mm3 \n\t" | |
1137 "pand %4, %%mm2 \n\t" | |
1138 "pand %5, %%mm3 \n\t" | |
1139 "por %%mm2, %%mm0 \n\t" | |
1140 "psrlq $16, %%mm1 \n\t" | |
1141 "psrlq $32, %%mm4 \n\t" | |
1142 "psllq $16, %%mm5 \n\t" | |
1143 "por %%mm3, %%mm1 \n\t" | |
1144 "pand %6, %%mm5 \n\t" | |
1145 "por %%mm5, %%mm4 \n\t" | |
18861 | 1146 |
29480 | 1147 MOVNTQ" %%mm0, %0 \n\t" |
1148 MOVNTQ" %%mm1, 8%0 \n\t" | |
1149 MOVNTQ" %%mm4, 16%0" | |
18861 | 1150 |
29480 | 1151 :"=m"(*d) |
1152 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
1153 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1154 d += 24; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1155 s += 8; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1156 } |
27744 | 1157 __asm__ volatile(SFENCE:::"memory"); |
1158 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1159 #endif |
29481 | 1160 while (s < end) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1161 register uint16_t bgr; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1162 bgr = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1163 *d++ = (bgr&0x1F)<<3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1164 *d++ = (bgr&0x7E0)>>3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1165 *d++ = (bgr&0xF800)>>8; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1166 } |
18861 | 1167 } |
1168 | |
28773
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1169 /* |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1170 * mm0 = 00 B3 00 B2 00 B1 00 B0 |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1171 * mm1 = 00 G3 00 G2 00 G1 00 G0 |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1172 * mm2 = 00 R3 00 R2 00 R1 00 R0 |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1173 * mm6 = FF FF FF FF FF FF FF FF |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1174 * mm7 = 00 00 00 00 00 00 00 00 |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1175 */ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1176 #define PACK_RGB32 \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1177 "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1178 "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1179 "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1180 "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1181 "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1182 "movq %%mm0, %%mm3 \n\t" \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1183 "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1184 "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1185 MOVNTQ" %%mm0, %0 \n\t" \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1186 MOVNTQ" %%mm3, 8%0 \n\t" \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1187 |
18861 | 1188 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size) |
1189 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1190 const uint16_t *end; |
28276 | 1191 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1192 const uint16_t *mm_end; |
18861 | 1193 #endif |
26909 | 1194 uint8_t *d = dst; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1195 const uint16_t *s = (const uint16_t *)src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1196 end = s + src_size/2; |
28276 | 1197 #if HAVE_MMX |
27744 | 1198 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1199 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); | |
28773
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1200 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1201 mm_end = end - 3; |
29481 | 1202 while (s < mm_end) { |
27744 | 1203 __asm__ volatile( |
29480 | 1204 PREFETCH" 32%1 \n\t" |
1205 "movq %1, %%mm0 \n\t" | |
1206 "movq %1, %%mm1 \n\t" | |
1207 "movq %1, %%mm2 \n\t" | |
1208 "pand %2, %%mm0 \n\t" | |
1209 "pand %3, %%mm1 \n\t" | |
1210 "pand %4, %%mm2 \n\t" | |
1211 "psllq $3, %%mm0 \n\t" | |
1212 "psrlq $2, %%mm1 \n\t" | |
1213 "psrlq $7, %%mm2 \n\t" | |
1214 PACK_RGB32 | |
1215 :"=m"(*d) | |
1216 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) | |
1217 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1218 d += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1219 s += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1220 } |
27744 | 1221 __asm__ volatile(SFENCE:::"memory"); |
1222 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1223 #endif |
29481 | 1224 while (s < end) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1225 register uint16_t bgr; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1226 bgr = *s++; |
29397 | 1227 #if HAVE_BIGENDIAN |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
1228 *d++ = 255; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1229 *d++ = (bgr&0x7C00)>>7; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1230 *d++ = (bgr&0x3E0)>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1231 *d++ = (bgr&0x1F)<<3; |
18861 | 1232 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1233 *d++ = (bgr&0x1F)<<3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1234 *d++ = (bgr&0x3E0)>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1235 *d++ = (bgr&0x7C00)>>7; |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
1236 *d++ = 255; |
18861 | 1237 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1238 } |
18861 | 1239 } |
1240 | |
1241 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size) | |
1242 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1243 const uint16_t *end; |
28276 | 1244 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1245 const uint16_t *mm_end; |
18861 | 1246 #endif |
26909 | 1247 uint8_t *d = dst; |
26910 | 1248 const uint16_t *s = (const uint16_t*)src; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1249 end = s + src_size/2; |
28276 | 1250 #if HAVE_MMX |
27744 | 1251 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1252 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); | |
28773
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1253 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1254 mm_end = end - 3; |
29481 | 1255 while (s < mm_end) { |
27744 | 1256 __asm__ volatile( |
29480 | 1257 PREFETCH" 32%1 \n\t" |
1258 "movq %1, %%mm0 \n\t" | |
1259 "movq %1, %%mm1 \n\t" | |
1260 "movq %1, %%mm2 \n\t" | |
1261 "pand %2, %%mm0 \n\t" | |
1262 "pand %3, %%mm1 \n\t" | |
1263 "pand %4, %%mm2 \n\t" | |
1264 "psllq $3, %%mm0 \n\t" | |
1265 "psrlq $3, %%mm1 \n\t" | |
1266 "psrlq $8, %%mm2 \n\t" | |
1267 PACK_RGB32 | |
1268 :"=m"(*d) | |
1269 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) | |
1270 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1271 d += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1272 s += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1273 } |
27744 | 1274 __asm__ volatile(SFENCE:::"memory"); |
1275 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1276 #endif |
29481 | 1277 while (s < end) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1278 register uint16_t bgr; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1279 bgr = *s++; |
29397 | 1280 #if HAVE_BIGENDIAN |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
1281 *d++ = 255; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1282 *d++ = (bgr&0xF800)>>8; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1283 *d++ = (bgr&0x7E0)>>3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1284 *d++ = (bgr&0x1F)<<3; |
18861 | 1285 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1286 *d++ = (bgr&0x1F)<<3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1287 *d++ = (bgr&0x7E0)>>3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1288 *d++ = (bgr&0xF800)>>8; |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
1289 *d++ = 255; |
18861 | 1290 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1291 } |
18861 | 1292 } |
1293 | |
1294 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | |
1295 { | |
28968 | 1296 x86_reg idx = 15 - src_size; |
26910 | 1297 const uint8_t *s = src-idx; |
1298 uint8_t *d = dst-idx; | |
28276 | 1299 #if HAVE_MMX |
27744 | 1300 __asm__ volatile( |
29480 | 1301 "test %0, %0 \n\t" |
1302 "jns 2f \n\t" | |
1303 PREFETCH" (%1, %0) \n\t" | |
1304 "movq %3, %%mm7 \n\t" | |
1305 "pxor %4, %%mm7 \n\t" | |
1306 "movq %%mm7, %%mm6 \n\t" | |
1307 "pxor %5, %%mm7 \n\t" | |
1308 ASMALIGN(4) | |
1309 "1: \n\t" | |
1310 PREFETCH" 32(%1, %0) \n\t" | |
1311 "movq (%1, %0), %%mm0 \n\t" | |
1312 "movq 8(%1, %0), %%mm1 \n\t" | |
28276 | 1313 # if HAVE_MMX2 |
29480 | 1314 "pshufw $177, %%mm0, %%mm3 \n\t" |
1315 "pshufw $177, %%mm1, %%mm5 \n\t" | |
1316 "pand %%mm7, %%mm0 \n\t" | |
1317 "pand %%mm6, %%mm3 \n\t" | |
1318 "pand %%mm7, %%mm1 \n\t" | |
1319 "pand %%mm6, %%mm5 \n\t" | |
1320 "por %%mm3, %%mm0 \n\t" | |
1321 "por %%mm5, %%mm1 \n\t" | |
22991 | 1322 # else |
29480 | 1323 "movq %%mm0, %%mm2 \n\t" |
1324 "movq %%mm1, %%mm4 \n\t" | |
1325 "pand %%mm7, %%mm0 \n\t" | |
1326 "pand %%mm6, %%mm2 \n\t" | |
1327 "pand %%mm7, %%mm1 \n\t" | |
1328 "pand %%mm6, %%mm4 \n\t" | |
1329 "movq %%mm2, %%mm3 \n\t" | |
1330 "movq %%mm4, %%mm5 \n\t" | |
1331 "pslld $16, %%mm2 \n\t" | |
1332 "psrld $16, %%mm3 \n\t" | |
1333 "pslld $16, %%mm4 \n\t" | |
1334 "psrld $16, %%mm5 \n\t" | |
1335 "por %%mm2, %%mm0 \n\t" | |
1336 "por %%mm4, %%mm1 \n\t" | |
1337 "por %%mm3, %%mm0 \n\t" | |
1338 "por %%mm5, %%mm1 \n\t" | |
22991 | 1339 # endif |
29480 | 1340 MOVNTQ" %%mm0, (%2, %0) \n\t" |
1341 MOVNTQ" %%mm1, 8(%2, %0) \n\t" | |
1342 "add $16, %0 \n\t" | |
1343 "js 1b \n\t" | |
1344 SFENCE" \n\t" | |
1345 EMMS" \n\t" | |
1346 "2: \n\t" | |
1347 : "+&r"(idx) | |
1348 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) | |
1349 : "memory"); | |
18861 | 1350 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1351 for (; idx<15; idx+=4) { |
26910 | 1352 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1353 v &= 0xff00ff; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1354 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1355 } |
18861 | 1356 } |
1357 | |
1358 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) | |
1359 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1360 unsigned i; |
28276 | 1361 #if HAVE_MMX |
28957 | 1362 x86_reg mmx_size= 23 - src_size; |
27744 | 1363 __asm__ volatile ( |
29480 | 1364 "test %%"REG_a", %%"REG_a" \n\t" |
1365 "jns 2f \n\t" | |
1366 "movq "MANGLE(mask24r)", %%mm5 \n\t" | |
1367 "movq "MANGLE(mask24g)", %%mm6 \n\t" | |
1368 "movq "MANGLE(mask24b)", %%mm7 \n\t" | |
1369 ASMALIGN(4) | |
1370 "1: \n\t" | |
1371 PREFETCH" 32(%1, %%"REG_a") \n\t" | |
1372 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG | |
1373 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG | |
1374 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B | |
1375 "psllq $16, %%mm0 \n\t" // 00 BGR BGR | |
1376 "pand %%mm5, %%mm0 \n\t" | |
1377 "pand %%mm6, %%mm1 \n\t" | |
1378 "pand %%mm7, %%mm2 \n\t" | |
1379 "por %%mm0, %%mm1 \n\t" | |
1380 "por %%mm2, %%mm1 \n\t" | |
1381 "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG | |
1382 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG | |
1383 "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B | |
1384 "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR | |
1385 "pand %%mm7, %%mm0 \n\t" | |
1386 "pand %%mm5, %%mm1 \n\t" | |
1387 "pand %%mm6, %%mm2 \n\t" | |
1388 "por %%mm0, %%mm1 \n\t" | |
1389 "por %%mm2, %%mm1 \n\t" | |
1390 "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B | |
1391 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R | |
1392 "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR | |
1393 "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG | |
1394 "pand %%mm6, %%mm0 \n\t" | |
1395 "pand %%mm7, %%mm1 \n\t" | |
1396 "pand %%mm5, %%mm2 \n\t" | |
1397 "por %%mm0, %%mm1 \n\t" | |
1398 "por %%mm2, %%mm1 \n\t" | |
1399 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" | |
1400 "add $24, %%"REG_a" \n\t" | |
1401 " js 1b \n\t" | |
1402 "2: \n\t" | |
1403 : "+a" (mmx_size) | |
1404 : "r" (src-mmx_size), "r"(dst-mmx_size) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1405 ); |
18861 | 1406 |
27744 | 1407 __asm__ volatile(SFENCE:::"memory"); |
1408 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1409 |
27158 | 1410 if (mmx_size==23) return; //finished, was multiple of 8 |
18861 | 1411 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1412 src+= src_size; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1413 dst+= src_size; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1414 src_size= 23-mmx_size; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1415 src-= src_size; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1416 dst-= src_size; |
18861 | 1417 #endif |
29481 | 1418 for (i=0; i<src_size; i+=3) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1419 register uint8_t x; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1420 x = src[i + 2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1421 dst[i + 1] = src[i + 1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1422 dst[i + 2] = src[i + 0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1423 dst[i + 0] = x; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1424 } |
18861 | 1425 } |
1426 | |
1427 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1428 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1429 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
18861 | 1430 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1431 long y; |
28968 | 1432 const x86_reg chromWidth= width>>1; |
29481 | 1433 for (y=0; y<height; y++) { |
28276 | 1434 #if HAVE_MMX |
29612 | 1435 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
27744 | 1436 __asm__ volatile( |
29480 | 1437 "xor %%"REG_a", %%"REG_a" \n\t" |
1438 ASMALIGN(4) | |
1439 "1: \n\t" | |
1440 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | |
1441 PREFETCH" 32(%2, %%"REG_a") \n\t" | |
1442 PREFETCH" 32(%3, %%"REG_a") \n\t" | |
1443 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) | |
1444 "movq %%mm0, %%mm2 \n\t" // U(0) | |
1445 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) | |
1446 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1447 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | |
18861 | 1448 |
29480 | 1449 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) |
1450 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) | |
1451 "movq %%mm3, %%mm4 \n\t" // Y(0) | |
1452 "movq %%mm5, %%mm6 \n\t" // Y(8) | |
1453 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) | |
1454 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) | |
1455 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) | |
1456 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) | |
18861 | 1457 |
29480 | 1458 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t" |
1459 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" | |
1460 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t" | |
1461 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" | |
18861 | 1462 |
29480 | 1463 "add $8, %%"REG_a" \n\t" |
1464 "cmp %4, %%"REG_a" \n\t" | |
1465 " jb 1b \n\t" | |
1466 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) | |
1467 : "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1468 ); |
18861 | 1469 #else |
1470 | |
28276 | 1471 #if ARCH_ALPHA && HAVE_MVI |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1472 #define pl2yuy2(n) \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1473 y1 = yc[n]; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1474 y2 = yc2[n]; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1475 u = uc[n]; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1476 v = vc[n]; \ |
27744 | 1477 __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \ |
1478 __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \ | |
1479 __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u)); \ | |
1480 __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v)); \ | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1481 yuv1 = (u << 8) + (v << 24); \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1482 yuv2 = yuv1 + y2; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1483 yuv1 += y1; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1484 qdst[n] = yuv1; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1485 qdst2[n] = yuv2; |
18861 | 1486 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1487 int i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1488 uint64_t *qdst = (uint64_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1489 uint64_t *qdst2 = (uint64_t *) (dst + dstStride); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1490 const uint32_t *yc = (uint32_t *) ysrc; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1491 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1492 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; |
29481 | 1493 for (i = 0; i < chromWidth; i += 8) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1494 uint64_t y1, y2, yuv1, yuv2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1495 uint64_t u, v; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1496 /* Prefetch */ |
27744 | 1497 __asm__("ldq $31,64(%0)" :: "r"(yc)); |
1498 __asm__("ldq $31,64(%0)" :: "r"(yc2)); | |
1499 __asm__("ldq $31,64(%0)" :: "r"(uc)); | |
1500 __asm__("ldq $31,64(%0)" :: "r"(vc)); | |
18861 | 1501 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1502 pl2yuy2(0); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1503 pl2yuy2(1); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1504 pl2yuy2(2); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1505 pl2yuy2(3); |
18861 | 1506 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1507 yc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1508 yc2 += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1509 uc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1510 vc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1511 qdst += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1512 qdst2 += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1513 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1514 y++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1515 ysrc += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1516 dst += dstStride; |
18861 | 1517 |
27688
49d5420c5698
Use HAVE_FAST_64BIT instead of nonstandard __WORDSIZE macro.
diego
parents:
27666
diff
changeset
|
1518 #elif HAVE_FAST_64BIT |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1519 int i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1520 uint64_t *ldst = (uint64_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1521 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
29481 | 1522 for (i = 0; i < chromWidth; i += 2) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1523 uint64_t k, l; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1524 k = yc[0] + (uc[0] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1525 (yc[1] << 16) + (vc[0] << 24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1526 l = yc[2] + (uc[1] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1527 (yc[3] << 16) + (vc[1] << 24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1528 *ldst++ = k + (l << 32); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1529 yc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1530 uc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1531 vc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1532 } |
18861 | 1533 |
1534 #else | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1535 int i, *idst = (int32_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1536 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
29481 | 1537 for (i = 0; i < chromWidth; i++) { |
29397 | 1538 #if HAVE_BIGENDIAN |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1539 *idst++ = (yc[0] << 24)+ (uc[0] << 16) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1540 (yc[1] << 8) + (vc[0] << 0); |
18861 | 1541 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1542 *idst++ = yc[0] + (uc[0] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1543 (yc[1] << 16) + (vc[0] << 24); |
18861 | 1544 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1545 yc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1546 uc++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1547 vc++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1548 } |
18861 | 1549 #endif |
1550 #endif | |
29481 | 1551 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1552 usrc += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1553 vsrc += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1554 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1555 ysrc += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1556 dst += dstStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1557 } |
28276 | 1558 #if HAVE_MMX |
29480 | 1559 __asm__(EMMS" \n\t" |
1560 SFENCE" \n\t" | |
1561 :::"memory"); | |
18861 | 1562 #endif |
1563 } | |
1564 | |
1565 /** | |
27158 | 1566 * Height should be a multiple of 2 and width should be a multiple of 16. |
1567 * (If this is a problem for anyone then tell me, and I will fix it.) | |
18861 | 1568 */ |
1569 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1570 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1571 long lumStride, long chromStride, long dstStride) |
18861 | 1572 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1573 //FIXME interpolate chroma |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1574 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); |
18861 | 1575 } |
1576 | |
1577 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1578 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1579 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
18861 | 1580 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1581 long y; |
28968 | 1582 const x86_reg chromWidth= width>>1; |
29481 | 1583 for (y=0; y<height; y++) { |
28276 | 1584 #if HAVE_MMX |
29612 | 1585 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
27744 | 1586 __asm__ volatile( |
29480 | 1587 "xor %%"REG_a", %%"REG_a" \n\t" |
1588 ASMALIGN(4) | |
1589 "1: \n\t" | |
1590 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | |
1591 PREFETCH" 32(%2, %%"REG_a") \n\t" | |
1592 PREFETCH" 32(%3, %%"REG_a") \n\t" | |
1593 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) | |
1594 "movq %%mm0, %%mm2 \n\t" // U(0) | |
1595 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) | |
1596 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1597 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | |
18861 | 1598 |
29480 | 1599 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) |
1600 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) | |
1601 "movq %%mm0, %%mm4 \n\t" // Y(0) | |
1602 "movq %%mm2, %%mm6 \n\t" // Y(8) | |
1603 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) | |
1604 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4) | |
1605 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) | |
1606 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) | |
18861 | 1607 |
29480 | 1608 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t" |
1609 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" | |
1610 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t" | |
1611 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" | |
18861 | 1612 |
29480 | 1613 "add $8, %%"REG_a" \n\t" |
1614 "cmp %4, %%"REG_a" \n\t" | |
1615 " jb 1b \n\t" | |
1616 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) | |
1617 : "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1618 ); |
18861 | 1619 #else |
25109 | 1620 //FIXME adapt the Alpha ASM code from yv12->yuy2 |
18861 | 1621 |
27688
49d5420c5698
Use HAVE_FAST_64BIT instead of nonstandard __WORDSIZE macro.
diego
parents:
27666
diff
changeset
|
1622 #if HAVE_FAST_64BIT |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1623 int i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1624 uint64_t *ldst = (uint64_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1625 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
29481 | 1626 for (i = 0; i < chromWidth; i += 2) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1627 uint64_t k, l; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1628 k = uc[0] + (yc[0] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1629 (vc[0] << 16) + (yc[1] << 24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1630 l = uc[1] + (yc[2] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1631 (vc[1] << 16) + (yc[3] << 24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1632 *ldst++ = k + (l << 32); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1633 yc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1634 uc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1635 vc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1636 } |
18861 | 1637 |
1638 #else | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1639 int i, *idst = (int32_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1640 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
29481 | 1641 for (i = 0; i < chromWidth; i++) { |
29397 | 1642 #if HAVE_BIGENDIAN |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1643 *idst++ = (uc[0] << 24)+ (yc[0] << 16) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1644 (vc[0] << 8) + (yc[1] << 0); |
18861 | 1645 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1646 *idst++ = uc[0] + (yc[0] << 8) + |
27158 | 1647 (vc[0] << 16) + (yc[1] << 24); |
18861 | 1648 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1649 yc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1650 uc++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1651 vc++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1652 } |
18861 | 1653 #endif |
1654 #endif | |
29481 | 1655 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1656 usrc += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1657 vsrc += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1658 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1659 ysrc += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1660 dst += dstStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1661 } |
28276 | 1662 #if HAVE_MMX |
29480 | 1663 __asm__(EMMS" \n\t" |
1664 SFENCE" \n\t" | |
1665 :::"memory"); | |
18861 | 1666 #endif |
1667 } | |
1668 | |
1669 /** | |
27158 | 1670 * Height should be a multiple of 2 and width should be a multiple of 16 |
1671 * (If this is a problem for anyone then tell me, and I will fix it.) | |
18861 | 1672 */ |
1673 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1674 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1675 long lumStride, long chromStride, long dstStride) |
18861 | 1676 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1677 //FIXME interpolate chroma |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1678 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); |
18861 | 1679 } |
1680 | |
1681 /** | |
25109 | 1682 * Width should be a multiple of 16. |
18861 | 1683 */ |
27495 | 1684 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1685 long width, long height, | |
1686 long lumStride, long chromStride, long dstStride) | |
1687 { | |
1688 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); | |
1689 } | |
1690 | |
1691 /** | |
1692 * Width should be a multiple of 16. | |
1693 */ | |
18861 | 1694 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1695 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1696 long lumStride, long chromStride, long dstStride) |
18861 | 1697 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1698 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); |
18861 | 1699 } |
1700 | |
1701 /** | |
27158 | 1702 * Height should be a multiple of 2 and width should be a multiple of 16. |
1703 * (If this is a problem for anyone then tell me, and I will fix it.) | |
18861 | 1704 */ |
1705 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1706 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1707 long lumStride, long chromStride, long srcStride) |
18861 | 1708 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1709 long y; |
28968 | 1710 const x86_reg chromWidth= width>>1; |
29481 | 1711 for (y=0; y<height; y+=2) { |
28276 | 1712 #if HAVE_MMX |
27744 | 1713 __asm__ volatile( |
29480 | 1714 "xor %%"REG_a", %%"REG_a" \n\t" |
1715 "pcmpeqw %%mm7, %%mm7 \n\t" | |
1716 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |
1717 ASMALIGN(4) | |
1718 "1: \n\t" | |
1719 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |
1720 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1721 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1722 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) | |
1723 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) | |
1724 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) | |
1725 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) | |
1726 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | |
1727 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | |
1728 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1729 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | |
18861 | 1730 |
29480 | 1731 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" |
18861 | 1732 |
29480 | 1733 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) |
1734 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) | |
1735 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) | |
1736 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) | |
1737 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) | |
1738 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) | |
1739 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | |
1740 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | |
1741 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | |
1742 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | |
18861 | 1743 |
29480 | 1744 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" |
18861 | 1745 |
29480 | 1746 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) |
1747 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | |
1748 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | |
1749 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | |
1750 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | |
1751 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | |
1752 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | |
1753 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | |
18861 | 1754 |
29480 | 1755 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" |
1756 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" | |
18861 | 1757 |
29480 | 1758 "add $8, %%"REG_a" \n\t" |
1759 "cmp %4, %%"REG_a" \n\t" | |
1760 " jb 1b \n\t" | |
1761 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | |
1762 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1763 ); |
18861 | 1764 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1765 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1766 src += srcStride; |
18861 | 1767 |
27744 | 1768 __asm__ volatile( |
29480 | 1769 "xor %%"REG_a", %%"REG_a" \n\t" |
1770 ASMALIGN(4) | |
1771 "1: \n\t" | |
1772 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |
1773 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1774 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1775 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) | |
1776 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) | |
1777 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | |
1778 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | |
1779 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | |
1780 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | |
1781 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | |
1782 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | |
18861 | 1783 |
29480 | 1784 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" |
1785 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" | |
18861 | 1786 |
29480 | 1787 "add $8, %%"REG_a" \n\t" |
1788 "cmp %4, %%"REG_a" \n\t" | |
1789 " jb 1b \n\t" | |
18861 | 1790 |
29480 | 1791 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
1792 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1793 ); |
18861 | 1794 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1795 long i; |
29481 | 1796 for (i=0; i<chromWidth; i++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1797 ydst[2*i+0] = src[4*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1798 udst[i] = src[4*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1799 ydst[2*i+1] = src[4*i+2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1800 vdst[i] = src[4*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1801 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1802 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1803 src += srcStride; |
18861 | 1804 |
29481 | 1805 for (i=0; i<chromWidth; i++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1806 ydst[2*i+0] = src[4*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1807 ydst[2*i+1] = src[4*i+2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1808 } |
18861 | 1809 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1810 udst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1811 vdst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1812 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1813 src += srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1814 } |
28276 | 1815 #if HAVE_MMX |
29480 | 1816 __asm__ volatile(EMMS" \n\t" |
1817 SFENCE" \n\t" | |
1818 :::"memory"); | |
18861 | 1819 #endif |
1820 } | |
1821 | |
1822 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1823 uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1824 long width, long height, long lumStride, long chromStride) |
18861 | 1825 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1826 /* Y Plane */ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1827 memcpy(ydst, ysrc, width*height); |
18861 | 1828 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1829 /* XXX: implement upscaling for U,V */ |
18861 | 1830 } |
1831 | |
1832 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) | |
1833 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1834 long x,y; |
23129 | 1835 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1836 dst[0]= src[0]; |
23129 | 1837 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1838 // first line |
29481 | 1839 for (x=0; x<srcWidth-1; x++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1840 dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1841 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1842 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1843 dst[2*srcWidth-1]= src[srcWidth-1]; |
23129 | 1844 |
29480 | 1845 dst+= dstStride; |
18861 | 1846 |
29481 | 1847 for (y=1; y<srcHeight; y++) { |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
1848 #if HAVE_MMX2 || HAVE_AMD3DNOW |
28957 | 1849 const x86_reg mmxSize= srcWidth&~15; |
27744 | 1850 __asm__ volatile( |
29480 | 1851 "mov %4, %%"REG_a" \n\t" |
1852 "1: \n\t" | |
1853 "movq (%0, %%"REG_a"), %%mm0 \n\t" | |
1854 "movq (%1, %%"REG_a"), %%mm1 \n\t" | |
1855 "movq 1(%0, %%"REG_a"), %%mm2 \n\t" | |
1856 "movq 1(%1, %%"REG_a"), %%mm3 \n\t" | |
1857 "movq -1(%0, %%"REG_a"), %%mm4 \n\t" | |
1858 "movq -1(%1, %%"REG_a"), %%mm5 \n\t" | |
1859 PAVGB" %%mm0, %%mm5 \n\t" | |
1860 PAVGB" %%mm0, %%mm3 \n\t" | |
1861 PAVGB" %%mm0, %%mm5 \n\t" | |
1862 PAVGB" %%mm0, %%mm3 \n\t" | |
1863 PAVGB" %%mm1, %%mm4 \n\t" | |
1864 PAVGB" %%mm1, %%mm2 \n\t" | |
1865 PAVGB" %%mm1, %%mm4 \n\t" | |
1866 PAVGB" %%mm1, %%mm2 \n\t" | |
1867 "movq %%mm5, %%mm7 \n\t" | |
1868 "movq %%mm4, %%mm6 \n\t" | |
1869 "punpcklbw %%mm3, %%mm5 \n\t" | |
1870 "punpckhbw %%mm3, %%mm7 \n\t" | |
1871 "punpcklbw %%mm2, %%mm4 \n\t" | |
1872 "punpckhbw %%mm2, %%mm6 \n\t" | |
18861 | 1873 #if 1 |
29480 | 1874 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" |
1875 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" | |
1876 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" | |
1877 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" | |
18861 | 1878 #else |
29480 | 1879 "movq %%mm5, (%2, %%"REG_a", 2) \n\t" |
1880 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t" | |
1881 "movq %%mm4, (%3, %%"REG_a", 2) \n\t" | |
1882 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t" | |
18861 | 1883 #endif |
29480 | 1884 "add $8, %%"REG_a" \n\t" |
1885 " js 1b \n\t" | |
1886 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), | |
1887 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), | |
1888 "g" (-mmxSize) | |
1889 : "%"REG_a | |
18861 | 1890 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1891 ); |
18861 | 1892 #else |
28968 | 1893 const x86_reg mmxSize=1; |
18861 | 1894 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1895 dst[0 ]= (3*src[0] + src[srcStride])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1896 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; |
18861 | 1897 |
29481 | 1898 for (x=mmxSize-1; x<srcWidth-1; x++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1899 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1900 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1901 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1902 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1903 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1904 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1905 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; |
18861 | 1906 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1907 dst+=dstStride*2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1908 src+=srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1909 } |
23129 | 1910 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1911 // last line |
18861 | 1912 #if 1 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1913 dst[0]= src[0]; |
23129 | 1914 |
29481 | 1915 for (x=0; x<srcWidth-1; x++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1916 dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1917 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1918 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1919 dst[2*srcWidth-1]= src[srcWidth-1]; |
18861 | 1920 #else |
29481 | 1921 for (x=0; x<srcWidth; x++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1922 dst[2*x+0]= |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1923 dst[2*x+1]= src[x]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1924 } |
18861 | 1925 #endif |
1926 | |
28276 | 1927 #if HAVE_MMX |
29480 | 1928 __asm__ volatile(EMMS" \n\t" |
1929 SFENCE" \n\t" | |
1930 :::"memory"); | |
18861 | 1931 #endif |
1932 } | |
1933 | |
1934 /** | |
27158 | 1935 * Height should be a multiple of 2 and width should be a multiple of 16. |
1936 * (If this is a problem for anyone then tell me, and I will fix it.) | |
1937 * Chrominance data is only taken from every second line, others are ignored. | |
25109 | 1938 * FIXME: Write HQ version. |
18861 | 1939 */ |
1940 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1941 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1942 long lumStride, long chromStride, long srcStride) |
18861 | 1943 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1944 long y; |
28968 | 1945 const x86_reg chromWidth= width>>1; |
29481 | 1946 for (y=0; y<height; y+=2) { |
28276 | 1947 #if HAVE_MMX |
27744 | 1948 __asm__ volatile( |
29480 | 1949 "xor %%"REG_a", %%"REG_a" \n\t" |
1950 "pcmpeqw %%mm7, %%mm7 \n\t" | |
1951 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |
1952 ASMALIGN(4) | |
1953 "1: \n\t" | |
1954 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |
1955 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) | |
1956 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) | |
1957 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) | |
1958 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) | |
1959 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) | |
1960 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) | |
1961 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | |
1962 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | |
1963 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1964 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | |
18861 | 1965 |
29480 | 1966 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" |
18861 | 1967 |
29480 | 1968 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8) |
1969 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12) | |
1970 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) | |
1971 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) | |
1972 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) | |
1973 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) | |
1974 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | |
1975 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | |
1976 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | |
1977 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | |
18861 | 1978 |
29480 | 1979 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" |
18861 | 1980 |
29480 | 1981 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) |
1982 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | |
1983 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | |
1984 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | |
1985 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | |
1986 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | |
1987 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | |
1988 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | |
18861 | 1989 |
29480 | 1990 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" |
1991 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" | |
18861 | 1992 |
29480 | 1993 "add $8, %%"REG_a" \n\t" |
1994 "cmp %4, %%"REG_a" \n\t" | |
1995 " jb 1b \n\t" | |
1996 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | |
1997 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1998 ); |
18861 | 1999 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2000 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2001 src += srcStride; |
18861 | 2002 |
27744 | 2003 __asm__ volatile( |
29480 | 2004 "xor %%"REG_a", %%"REG_a" \n\t" |
2005 ASMALIGN(4) | |
2006 "1: \n\t" | |
2007 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |
2008 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |
2009 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) | |
2010 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) | |
2011 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) | |
2012 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | |
2013 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | |
2014 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | |
2015 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | |
2016 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | |
2017 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | |
18861 | 2018 |
29480 | 2019 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" |
2020 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" | |
18861 | 2021 |
29480 | 2022 "add $8, %%"REG_a" \n\t" |
2023 "cmp %4, %%"REG_a" \n\t" | |
2024 " jb 1b \n\t" | |
18861 | 2025 |
29480 | 2026 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2027 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2028 ); |
18861 | 2029 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2030 long i; |
29481 | 2031 for (i=0; i<chromWidth; i++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2032 udst[i] = src[4*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2033 ydst[2*i+0] = src[4*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2034 vdst[i] = src[4*i+2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2035 ydst[2*i+1] = src[4*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2036 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2037 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2038 src += srcStride; |
18861 | 2039 |
29481 | 2040 for (i=0; i<chromWidth; i++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2041 ydst[2*i+0] = src[4*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2042 ydst[2*i+1] = src[4*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2043 } |
18861 | 2044 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2045 udst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2046 vdst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2047 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2048 src += srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2049 } |
28276 | 2050 #if HAVE_MMX |
29480 | 2051 __asm__ volatile(EMMS" \n\t" |
2052 SFENCE" \n\t" | |
2053 :::"memory"); | |
18861 | 2054 #endif |
2055 } | |
2056 | |
2057 /** | |
27158 | 2058 * Height should be a multiple of 2 and width should be a multiple of 2. |
2059 * (If this is a problem for anyone then tell me, and I will fix it.) | |
2060 * Chrominance data is only taken from every second line, | |
25109 | 2061 * others are ignored in the C version. |
2062 * FIXME: Write HQ version. | |
18861 | 2063 */ |
2064 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2065 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2066 long lumStride, long chromStride, long srcStride) |
18861 | 2067 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2068 long y; |
28968 | 2069 const x86_reg chromWidth= width>>1; |
28276 | 2070 #if HAVE_MMX |
29481 | 2071 for (y=0; y<height-2; y+=2) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2072 long i; |
29481 | 2073 for (i=0; i<2; i++) { |
27744 | 2074 __asm__ volatile( |
29480 | 2075 "mov %2, %%"REG_a" \n\t" |
2076 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" | |
2077 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |
2078 "pxor %%mm7, %%mm7 \n\t" | |
2079 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |
2080 ASMALIGN(4) | |
2081 "1: \n\t" | |
2082 PREFETCH" 64(%0, %%"REG_d") \n\t" | |
2083 "movd (%0, %%"REG_d"), %%mm0 \n\t" | |
2084 "movd 3(%0, %%"REG_d"), %%mm1 \n\t" | |
2085 "punpcklbw %%mm7, %%mm0 \n\t" | |
2086 "punpcklbw %%mm7, %%mm1 \n\t" | |
2087 "movd 6(%0, %%"REG_d"), %%mm2 \n\t" | |
2088 "movd 9(%0, %%"REG_d"), %%mm3 \n\t" | |
2089 "punpcklbw %%mm7, %%mm2 \n\t" | |
2090 "punpcklbw %%mm7, %%mm3 \n\t" | |
2091 "pmaddwd %%mm6, %%mm0 \n\t" | |
2092 "pmaddwd %%mm6, %%mm1 \n\t" | |
2093 "pmaddwd %%mm6, %%mm2 \n\t" | |
2094 "pmaddwd %%mm6, %%mm3 \n\t" | |
18861 | 2095 #ifndef FAST_BGR2YV12 |
29480 | 2096 "psrad $8, %%mm0 \n\t" |
2097 "psrad $8, %%mm1 \n\t" | |
2098 "psrad $8, %%mm2 \n\t" | |
2099 "psrad $8, %%mm3 \n\t" | |
18861 | 2100 #endif |
29480 | 2101 "packssdw %%mm1, %%mm0 \n\t" |
2102 "packssdw %%mm3, %%mm2 \n\t" | |
2103 "pmaddwd %%mm5, %%mm0 \n\t" | |
2104 "pmaddwd %%mm5, %%mm2 \n\t" | |
2105 "packssdw %%mm2, %%mm0 \n\t" | |
2106 "psraw $7, %%mm0 \n\t" | |
18861 | 2107 |
29480 | 2108 "movd 12(%0, %%"REG_d"), %%mm4 \n\t" |
2109 "movd 15(%0, %%"REG_d"), %%mm1 \n\t" | |
2110 "punpcklbw %%mm7, %%mm4 \n\t" | |
2111 "punpcklbw %%mm7, %%mm1 \n\t" | |
2112 "movd 18(%0, %%"REG_d"), %%mm2 \n\t" | |
2113 "movd 21(%0, %%"REG_d"), %%mm3 \n\t" | |
2114 "punpcklbw %%mm7, %%mm2 \n\t" | |
2115 "punpcklbw %%mm7, %%mm3 \n\t" | |
2116 "pmaddwd %%mm6, %%mm4 \n\t" | |
2117 "pmaddwd %%mm6, %%mm1 \n\t" | |
2118 "pmaddwd %%mm6, %%mm2 \n\t" | |
2119 "pmaddwd %%mm6, %%mm3 \n\t" | |
18861 | 2120 #ifndef FAST_BGR2YV12 |
29480 | 2121 "psrad $8, %%mm4 \n\t" |
2122 "psrad $8, %%mm1 \n\t" | |
2123 "psrad $8, %%mm2 \n\t" | |
2124 "psrad $8, %%mm3 \n\t" | |
18861 | 2125 #endif |
29480 | 2126 "packssdw %%mm1, %%mm4 \n\t" |
2127 "packssdw %%mm3, %%mm2 \n\t" | |
2128 "pmaddwd %%mm5, %%mm4 \n\t" | |
2129 "pmaddwd %%mm5, %%mm2 \n\t" | |
2130 "add $24, %%"REG_d" \n\t" | |
2131 "packssdw %%mm2, %%mm4 \n\t" | |
2132 "psraw $7, %%mm4 \n\t" | |
18861 | 2133 |
29480 | 2134 "packuswb %%mm4, %%mm0 \n\t" |
2135 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" | |
18861 | 2136 |
29480 | 2137 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" |
2138 "add $8, %%"REG_a" \n\t" | |
2139 " js 1b \n\t" | |
2140 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) | |
2141 : "%"REG_a, "%"REG_d | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2142 ); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2143 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2144 src += srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2145 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2146 src -= srcStride*2; |
27744 | 2147 __asm__ volatile( |
29480 | 2148 "mov %4, %%"REG_a" \n\t" |
2149 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |
2150 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" | |
2151 "pxor %%mm7, %%mm7 \n\t" | |
2152 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |
2153 "add %%"REG_d", %%"REG_d" \n\t" | |
2154 ASMALIGN(4) | |
2155 "1: \n\t" | |
2156 PREFETCH" 64(%0, %%"REG_d") \n\t" | |
2157 PREFETCH" 64(%1, %%"REG_d") \n\t" | |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
2158 #if HAVE_MMX2 || HAVE_AMD3DNOW |
29480 | 2159 "movq (%0, %%"REG_d"), %%mm0 \n\t" |
2160 "movq (%1, %%"REG_d"), %%mm1 \n\t" | |
2161 "movq 6(%0, %%"REG_d"), %%mm2 \n\t" | |
2162 "movq 6(%1, %%"REG_d"), %%mm3 \n\t" | |
2163 PAVGB" %%mm1, %%mm0 \n\t" | |
2164 PAVGB" %%mm3, %%mm2 \n\t" | |
2165 "movq %%mm0, %%mm1 \n\t" | |
2166 "movq %%mm2, %%mm3 \n\t" | |
2167 "psrlq $24, %%mm0 \n\t" | |
2168 "psrlq $24, %%mm2 \n\t" | |
2169 PAVGB" %%mm1, %%mm0 \n\t" | |
2170 PAVGB" %%mm3, %%mm2 \n\t" | |
2171 "punpcklbw %%mm7, %%mm0 \n\t" | |
2172 "punpcklbw %%mm7, %%mm2 \n\t" | |
18861 | 2173 #else |
29480 | 2174 "movd (%0, %%"REG_d"), %%mm0 \n\t" |
2175 "movd (%1, %%"REG_d"), %%mm1 \n\t" | |
2176 "movd 3(%0, %%"REG_d"), %%mm2 \n\t" | |
2177 "movd 3(%1, %%"REG_d"), %%mm3 \n\t" | |
2178 "punpcklbw %%mm7, %%mm0 \n\t" | |
2179 "punpcklbw %%mm7, %%mm1 \n\t" | |
2180 "punpcklbw %%mm7, %%mm2 \n\t" | |
2181 "punpcklbw %%mm7, %%mm3 \n\t" | |
2182 "paddw %%mm1, %%mm0 \n\t" | |
2183 "paddw %%mm3, %%mm2 \n\t" | |
2184 "paddw %%mm2, %%mm0 \n\t" | |
2185 "movd 6(%0, %%"REG_d"), %%mm4 \n\t" | |
2186 "movd 6(%1, %%"REG_d"), %%mm1 \n\t" | |
2187 "movd 9(%0, %%"REG_d"), %%mm2 \n\t" | |
2188 "movd 9(%1, %%"REG_d"), %%mm3 \n\t" | |
2189 "punpcklbw %%mm7, %%mm4 \n\t" | |
2190 "punpcklbw %%mm7, %%mm1 \n\t" | |
2191 "punpcklbw %%mm7, %%mm2 \n\t" | |
2192 "punpcklbw %%mm7, %%mm3 \n\t" | |
2193 "paddw %%mm1, %%mm4 \n\t" | |
2194 "paddw %%mm3, %%mm2 \n\t" | |
2195 "paddw %%mm4, %%mm2 \n\t" | |
2196 "psrlw $2, %%mm0 \n\t" | |
2197 "psrlw $2, %%mm2 \n\t" | |
18861 | 2198 #endif |
29480 | 2199 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" |
2200 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |
18861 | 2201 |
29480 | 2202 "pmaddwd %%mm0, %%mm1 \n\t" |
2203 "pmaddwd %%mm2, %%mm3 \n\t" | |
2204 "pmaddwd %%mm6, %%mm0 \n\t" | |
2205 "pmaddwd %%mm6, %%mm2 \n\t" | |
18861 | 2206 #ifndef FAST_BGR2YV12 |
29480 | 2207 "psrad $8, %%mm0 \n\t" |
2208 "psrad $8, %%mm1 \n\t" | |
2209 "psrad $8, %%mm2 \n\t" | |
2210 "psrad $8, %%mm3 \n\t" | |
18861 | 2211 #endif |
29480 | 2212 "packssdw %%mm2, %%mm0 \n\t" |
2213 "packssdw %%mm3, %%mm1 \n\t" | |
2214 "pmaddwd %%mm5, %%mm0 \n\t" | |
2215 "pmaddwd %%mm5, %%mm1 \n\t" | |
2216 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | |
2217 "psraw $7, %%mm0 \n\t" | |
18861 | 2218 |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
2219 #if HAVE_MMX2 || HAVE_AMD3DNOW |
29480 | 2220 "movq 12(%0, %%"REG_d"), %%mm4 \n\t" |
2221 "movq 12(%1, %%"REG_d"), %%mm1 \n\t" | |
2222 "movq 18(%0, %%"REG_d"), %%mm2 \n\t" | |
2223 "movq 18(%1, %%"REG_d"), %%mm3 \n\t" | |
2224 PAVGB" %%mm1, %%mm4 \n\t" | |
2225 PAVGB" %%mm3, %%mm2 \n\t" | |
2226 "movq %%mm4, %%mm1 \n\t" | |
2227 "movq %%mm2, %%mm3 \n\t" | |
2228 "psrlq $24, %%mm4 \n\t" | |
2229 "psrlq $24, %%mm2 \n\t" | |
2230 PAVGB" %%mm1, %%mm4 \n\t" | |
2231 PAVGB" %%mm3, %%mm2 \n\t" | |
2232 "punpcklbw %%mm7, %%mm4 \n\t" | |
2233 "punpcklbw %%mm7, %%mm2 \n\t" | |
18861 | 2234 #else |
29480 | 2235 "movd 12(%0, %%"REG_d"), %%mm4 \n\t" |
2236 "movd 12(%1, %%"REG_d"), %%mm1 \n\t" | |
2237 "movd 15(%0, %%"REG_d"), %%mm2 \n\t" | |
2238 "movd 15(%1, %%"REG_d"), %%mm3 \n\t" | |
2239 "punpcklbw %%mm7, %%mm4 \n\t" | |
2240 "punpcklbw %%mm7, %%mm1 \n\t" | |
2241 "punpcklbw %%mm7, %%mm2 \n\t" | |
2242 "punpcklbw %%mm7, %%mm3 \n\t" | |
2243 "paddw %%mm1, %%mm4 \n\t" | |
2244 "paddw %%mm3, %%mm2 \n\t" | |
2245 "paddw %%mm2, %%mm4 \n\t" | |
2246 "movd 18(%0, %%"REG_d"), %%mm5 \n\t" | |
2247 "movd 18(%1, %%"REG_d"), %%mm1 \n\t" | |
2248 "movd 21(%0, %%"REG_d"), %%mm2 \n\t" | |
2249 "movd 21(%1, %%"REG_d"), %%mm3 \n\t" | |
2250 "punpcklbw %%mm7, %%mm5 \n\t" | |
2251 "punpcklbw %%mm7, %%mm1 \n\t" | |
2252 "punpcklbw %%mm7, %%mm2 \n\t" | |
2253 "punpcklbw %%mm7, %%mm3 \n\t" | |
2254 "paddw %%mm1, %%mm5 \n\t" | |
2255 "paddw %%mm3, %%mm2 \n\t" | |
2256 "paddw %%mm5, %%mm2 \n\t" | |
2257 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |
2258 "psrlw $2, %%mm4 \n\t" | |
2259 "psrlw $2, %%mm2 \n\t" | |
18861 | 2260 #endif |
29480 | 2261 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" |
2262 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |
18861 | 2263 |
29480 | 2264 "pmaddwd %%mm4, %%mm1 \n\t" |
2265 "pmaddwd %%mm2, %%mm3 \n\t" | |
2266 "pmaddwd %%mm6, %%mm4 \n\t" | |
2267 "pmaddwd %%mm6, %%mm2 \n\t" | |
18861 | 2268 #ifndef FAST_BGR2YV12 |
29480 | 2269 "psrad $8, %%mm4 \n\t" |
2270 "psrad $8, %%mm1 \n\t" | |
2271 "psrad $8, %%mm2 \n\t" | |
2272 "psrad $8, %%mm3 \n\t" | |
18861 | 2273 #endif |
29480 | 2274 "packssdw %%mm2, %%mm4 \n\t" |
2275 "packssdw %%mm3, %%mm1 \n\t" | |
2276 "pmaddwd %%mm5, %%mm4 \n\t" | |
2277 "pmaddwd %%mm5, %%mm1 \n\t" | |
2278 "add $24, %%"REG_d" \n\t" | |
2279 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | |
2280 "psraw $7, %%mm4 \n\t" | |
18861 | 2281 |
29480 | 2282 "movq %%mm0, %%mm1 \n\t" |
2283 "punpckldq %%mm4, %%mm0 \n\t" | |
2284 "punpckhdq %%mm4, %%mm1 \n\t" | |
2285 "packsswb %%mm1, %%mm0 \n\t" | |
2286 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" | |
2287 "movd %%mm0, (%2, %%"REG_a") \n\t" | |
2288 "punpckhdq %%mm0, %%mm0 \n\t" | |
2289 "movd %%mm0, (%3, %%"REG_a") \n\t" | |
2290 "add $4, %%"REG_a" \n\t" | |
2291 " js 1b \n\t" | |
2292 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) | |
2293 : "%"REG_a, "%"REG_d | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2294 ); |
18861 | 2295 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2296 udst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2297 vdst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2298 src += srcStride*2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2299 } |
18861 | 2300 |
29480 | 2301 __asm__ volatile(EMMS" \n\t" |
2302 SFENCE" \n\t" | |
2303 :::"memory"); | |
18861 | 2304 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2305 y=0; |
18861 | 2306 #endif |
29481 | 2307 for (; y<height; y+=2) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2308 long i; |
29481 | 2309 for (i=0; i<chromWidth; i++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2310 unsigned int b = src[6*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2311 unsigned int g = src[6*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2312 unsigned int r = src[6*i+2]; |
18861 | 2313 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2314 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2315 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2316 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128; |
18861 | 2317 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2318 udst[i] = U; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2319 vdst[i] = V; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2320 ydst[2*i] = Y; |
18861 | 2321 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2322 b = src[6*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2323 g = src[6*i+4]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2324 r = src[6*i+5]; |
18861 | 2325 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2326 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2327 ydst[2*i+1] = Y; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2328 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2329 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2330 src += srcStride; |
18861 | 2331 |
29481 | 2332 for (i=0; i<chromWidth; i++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2333 unsigned int b = src[6*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2334 unsigned int g = src[6*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2335 unsigned int r = src[6*i+2]; |
18861 | 2336 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2337 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
18861 | 2338 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2339 ydst[2*i] = Y; |
18861 | 2340 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2341 b = src[6*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2342 g = src[6*i+4]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2343 r = src[6*i+5]; |
18861 | 2344 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2345 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2346 ydst[2*i+1] = Y; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2347 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2348 udst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2349 vdst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2350 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2351 src += srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2352 } |
18861 | 2353 } |
2354 | |
30264
1032ff2e83f1
Const correctness for src pointer. Remove all constness related warnings in
zuxy
parents:
30211
diff
changeset
|
2355 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2356 long width, long height, long src1Stride, |
29481 | 2357 long src2Stride, long dstStride) |
2358 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2359 long h; |
18861 | 2360 |
29481 | 2361 for (h=0; h < height; h++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2362 long w; |
18861 | 2363 |
28276 | 2364 #if HAVE_MMX |
2365 #if HAVE_SSE2 | |
27744 | 2366 __asm__( |
29480 | 2367 "xor %%"REG_a", %%"REG_a" \n\t" |
2368 "1: \n\t" | |
2369 PREFETCH" 64(%1, %%"REG_a") \n\t" | |
2370 PREFETCH" 64(%2, %%"REG_a") \n\t" | |
2371 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t" | |
2372 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t" | |
2373 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t" | |
2374 "punpcklbw %%xmm2, %%xmm0 \n\t" | |
2375 "punpckhbw %%xmm2, %%xmm1 \n\t" | |
2376 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" | |
2377 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" | |
2378 "add $16, %%"REG_a" \n\t" | |
2379 "cmp %3, %%"REG_a" \n\t" | |
2380 " jb 1b \n\t" | |
2381 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | |
2382 : "memory", "%"REG_a"" | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2383 ); |
18861 | 2384 #else |
27744 | 2385 __asm__( |
29480 | 2386 "xor %%"REG_a", %%"REG_a" \n\t" |
2387 "1: \n\t" | |
2388 PREFETCH" 64(%1, %%"REG_a") \n\t" | |
2389 PREFETCH" 64(%2, %%"REG_a") \n\t" | |
2390 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
2391 "movq 8(%1, %%"REG_a"), %%mm2 \n\t" | |
2392 "movq %%mm0, %%mm1 \n\t" | |
2393 "movq %%mm2, %%mm3 \n\t" | |
2394 "movq (%2, %%"REG_a"), %%mm4 \n\t" | |
2395 "movq 8(%2, %%"REG_a"), %%mm5 \n\t" | |
2396 "punpcklbw %%mm4, %%mm0 \n\t" | |
2397 "punpckhbw %%mm4, %%mm1 \n\t" | |
2398 "punpcklbw %%mm5, %%mm2 \n\t" | |
2399 "punpckhbw %%mm5, %%mm3 \n\t" | |
2400 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t" | |
2401 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t" | |
2402 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" | |
2403 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" | |
2404 "add $16, %%"REG_a" \n\t" | |
2405 "cmp %3, %%"REG_a" \n\t" | |
2406 " jb 1b \n\t" | |
2407 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | |
2408 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2409 ); |
18861 | 2410 #endif |
29481 | 2411 for (w= (width&(~15)); w < width; w++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2412 dest[2*w+0] = src1[w]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2413 dest[2*w+1] = src2[w]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2414 } |
18861 | 2415 #else |
29481 | 2416 for (w=0; w < width; w++) { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2417 dest[2*w+0] = src1[w]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2418 dest[2*w+1] = src2[w]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2419 } |
18861 | 2420 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2421 dest += dstStride; |
18861 | 2422 src1 += src1Stride; |
2423 src2 += src2Stride; | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2424 } |
28276 | 2425 #if HAVE_MMX |
27744 | 2426 __asm__( |
29480 | 2427 EMMS" \n\t" |
2428 SFENCE" \n\t" | |
2429 ::: "memory" | |
2430 ); | |
18861 | 2431 #endif |
2432 } | |
2433 | |
2434 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2435 uint8_t *dst1, uint8_t *dst2, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2436 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2437 long srcStride1, long srcStride2, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2438 long dstStride1, long dstStride2) |
18861 | 2439 { |
28968 | 2440 x86_reg y; |
2441 long x,w,h; | |
18861 | 2442 w=width/2; h=height/2; |
28276 | 2443 #if HAVE_MMX |
27744 | 2444 __asm__ volatile( |
29480 | 2445 PREFETCH" %0 \n\t" |
2446 PREFETCH" %1 \n\t" | |
2447 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); | |
18861 | 2448 #endif |
29481 | 2449 for (y=0;y<h;y++) { |
29480 | 2450 const uint8_t* s1=src1+srcStride1*(y>>1); |
2451 uint8_t* d=dst1+dstStride1*y; | |
2452 x=0; | |
28276 | 2453 #if HAVE_MMX |
29481 | 2454 for (;x<w-31;x+=32) { |
29480 | 2455 __asm__ volatile( |
2456 PREFETCH" 32%1 \n\t" | |
2457 "movq %1, %%mm0 \n\t" | |
2458 "movq 8%1, %%mm2 \n\t" | |
2459 "movq 16%1, %%mm4 \n\t" | |
2460 "movq 24%1, %%mm6 \n\t" | |
2461 "movq %%mm0, %%mm1 \n\t" | |
2462 "movq %%mm2, %%mm3 \n\t" | |
2463 "movq %%mm4, %%mm5 \n\t" | |
2464 "movq %%mm6, %%mm7 \n\t" | |
2465 "punpcklbw %%mm0, %%mm0 \n\t" | |
2466 "punpckhbw %%mm1, %%mm1 \n\t" | |
2467 "punpcklbw %%mm2, %%mm2 \n\t" | |
2468 "punpckhbw %%mm3, %%mm3 \n\t" | |
2469 "punpcklbw %%mm4, %%mm4 \n\t" | |
2470 "punpckhbw %%mm5, %%mm5 \n\t" | |
2471 "punpcklbw %%mm6, %%mm6 \n\t" | |
2472 "punpckhbw %%mm7, %%mm7 \n\t" | |
2473 MOVNTQ" %%mm0, %0 \n\t" | |
2474 MOVNTQ" %%mm1, 8%0 \n\t" | |
2475 MOVNTQ" %%mm2, 16%0 \n\t" | |
2476 MOVNTQ" %%mm3, 24%0 \n\t" | |
2477 MOVNTQ" %%mm4, 32%0 \n\t" | |
2478 MOVNTQ" %%mm5, 40%0 \n\t" | |
2479 MOVNTQ" %%mm6, 48%0 \n\t" | |
2480 MOVNTQ" %%mm7, 56%0" | |
2481 :"=m"(d[2*x]) | |
2482 :"m"(s1[x]) | |
2483 :"memory"); | |
2484 } | |
18861 | 2485 #endif |
29480 | 2486 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
18861 | 2487 } |
29481 | 2488 for (y=0;y<h;y++) { |
29480 | 2489 const uint8_t* s2=src2+srcStride2*(y>>1); |
2490 uint8_t* d=dst2+dstStride2*y; | |
2491 x=0; | |
28276 | 2492 #if HAVE_MMX |
29481 | 2493 for (;x<w-31;x+=32) { |
29480 | 2494 __asm__ volatile( |
2495 PREFETCH" 32%1 \n\t" | |
2496 "movq %1, %%mm0 \n\t" | |
2497 "movq 8%1, %%mm2 \n\t" | |
2498 "movq 16%1, %%mm4 \n\t" | |
2499 "movq 24%1, %%mm6 \n\t" | |
2500 "movq %%mm0, %%mm1 \n\t" | |
2501 "movq %%mm2, %%mm3 \n\t" | |
2502 "movq %%mm4, %%mm5 \n\t" | |
2503 "movq %%mm6, %%mm7 \n\t" | |
2504 "punpcklbw %%mm0, %%mm0 \n\t" | |
2505 "punpckhbw %%mm1, %%mm1 \n\t" | |
2506 "punpcklbw %%mm2, %%mm2 \n\t" | |
2507 "punpckhbw %%mm3, %%mm3 \n\t" | |
2508 "punpcklbw %%mm4, %%mm4 \n\t" | |
2509 "punpckhbw %%mm5, %%mm5 \n\t" | |
2510 "punpcklbw %%mm6, %%mm6 \n\t" | |
2511 "punpckhbw %%mm7, %%mm7 \n\t" | |
2512 MOVNTQ" %%mm0, %0 \n\t" | |
2513 MOVNTQ" %%mm1, 8%0 \n\t" | |
2514 MOVNTQ" %%mm2, 16%0 \n\t" | |
2515 MOVNTQ" %%mm3, 24%0 \n\t" | |
2516 MOVNTQ" %%mm4, 32%0 \n\t" | |
2517 MOVNTQ" %%mm5, 40%0 \n\t" | |
2518 MOVNTQ" %%mm6, 48%0 \n\t" | |
2519 MOVNTQ" %%mm7, 56%0" | |
2520 :"=m"(d[2*x]) | |
2521 :"m"(s2[x]) | |
2522 :"memory"); | |
2523 } | |
18861 | 2524 #endif |
29480 | 2525 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |
18861 | 2526 } |
28276 | 2527 #if HAVE_MMX |
27744 | 2528 __asm__( |
29480 | 2529 EMMS" \n\t" |
2530 SFENCE" \n\t" | |
2531 ::: "memory" | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2532 ); |
18861 | 2533 #endif |
2534 } | |
2535 | |
2536 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2537 uint8_t *dst, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2538 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2539 long srcStride1, long srcStride2, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2540 long srcStride3, long dstStride) |
18861 | 2541 { |
28968 | 2542 x86_reg x; |
28957 | 2543 long y,w,h; |
18861 | 2544 w=width/2; h=height; |
29481 | 2545 for (y=0;y<h;y++) { |
29480 | 2546 const uint8_t* yp=src1+srcStride1*y; |
2547 const uint8_t* up=src2+srcStride2*(y>>2); | |
2548 const uint8_t* vp=src3+srcStride3*(y>>2); | |
2549 uint8_t* d=dst+dstStride*y; | |
2550 x=0; | |
28276 | 2551 #if HAVE_MMX |
29481 | 2552 for (;x<w-7;x+=8) { |
29480 | 2553 __asm__ volatile( |
2554 PREFETCH" 32(%1, %0) \n\t" | |
2555 PREFETCH" 32(%2, %0) \n\t" | |
2556 PREFETCH" 32(%3, %0) \n\t" | |
2557 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ | |
2558 "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */ | |
2559 "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */ | |
2560 "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ | |
2561 "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */ | |
2562 "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */ | |
2563 "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */ | |
2564 "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */ | |
2565 "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */ | |
2566 "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */ | |
18861 | 2567 |
29480 | 2568 "movq %%mm1, %%mm6 \n\t" |
2569 "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/ | |
2570 "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ | |
2571 "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ | |
2572 MOVNTQ" %%mm0, (%4, %0, 8) \n\t" | |
2573 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t" | |
23129 | 2574 |
29480 | 2575 "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/ |
2576 "movq 8(%1, %0, 4), %%mm0 \n\t" | |
2577 "movq %%mm0, %%mm3 \n\t" | |
2578 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/ | |
2579 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/ | |
2580 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t" | |
2581 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t" | |
18861 | 2582 |
29480 | 2583 "movq %%mm4, %%mm6 \n\t" |
2584 "movq 16(%1, %0, 4), %%mm0 \n\t" | |
2585 "movq %%mm0, %%mm3 \n\t" | |
2586 "punpcklbw %%mm5, %%mm4 \n\t" | |
2587 "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/ | |
2588 "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/ | |
2589 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t" | |
2590 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t" | |
23129 | 2591 |
29480 | 2592 "punpckhbw %%mm5, %%mm6 \n\t" |
2593 "movq 24(%1, %0, 4), %%mm0 \n\t" | |
2594 "movq %%mm0, %%mm3 \n\t" | |
2595 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/ | |
2596 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/ | |
2597 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t" | |
2598 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t" | |
18861 | 2599 |
29480 | 2600 : "+r" (x) |
2601 : "r"(yp), "r" (up), "r"(vp), "r"(d) | |
2602 :"memory"); | |
2603 } | |
18861 | 2604 #endif |
29481 | 2605 for (; x<w; x++) { |
29480 | 2606 const long x2 = x<<2; |
2607 d[8*x+0] = yp[x2]; | |
2608 d[8*x+1] = up[x]; | |
2609 d[8*x+2] = yp[x2+1]; | |
2610 d[8*x+3] = vp[x]; | |
2611 d[8*x+4] = yp[x2+2]; | |
2612 d[8*x+5] = up[x]; | |
2613 d[8*x+6] = yp[x2+3]; | |
2614 d[8*x+7] = vp[x]; | |
2615 } | |
18861 | 2616 } |
28276 | 2617 #if HAVE_MMX |
27744 | 2618 __asm__( |
29480 | 2619 EMMS" \n\t" |
2620 SFENCE" \n\t" | |
2621 ::: "memory" | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2622 ); |
18861 | 2623 #endif |
2624 } | |
22960 | 2625 |
28962 | 2626 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) |
2627 { | |
2628 dst += count; | |
2629 src += 2*count; | |
2630 count= - count; | |
2631 | |
2632 #if HAVE_MMX | |
29481 | 2633 if(count <= -16) { |
28962 | 2634 count += 15; |
2635 __asm__ volatile( | |
2636 "pcmpeqw %%mm7, %%mm7 \n\t" | |
2637 "psrlw $8, %%mm7 \n\t" | |
2638 "1: \n\t" | |
2639 "movq -30(%1, %0, 2), %%mm0 \n\t" | |
2640 "movq -22(%1, %0, 2), %%mm1 \n\t" | |
2641 "movq -14(%1, %0, 2), %%mm2 \n\t" | |
2642 "movq -6(%1, %0, 2), %%mm3 \n\t" | |
2643 "pand %%mm7, %%mm0 \n\t" | |
2644 "pand %%mm7, %%mm1 \n\t" | |
2645 "pand %%mm7, %%mm2 \n\t" | |
2646 "pand %%mm7, %%mm3 \n\t" | |
2647 "packuswb %%mm1, %%mm0 \n\t" | |
2648 "packuswb %%mm3, %%mm2 \n\t" | |
2649 MOVNTQ" %%mm0,-15(%2, %0) \n\t" | |
2650 MOVNTQ" %%mm2,- 7(%2, %0) \n\t" | |
2651 "add $16, %0 \n\t" | |
2652 " js 1b \n\t" | |
2653 : "+r"(count) | |
2654 : "r"(src), "r"(dst) | |
2655 ); | |
2656 count -= 15; | |
2657 } | |
2658 #endif | |
29481 | 2659 while(count<0) { |
28962 | 2660 dst[count]= src[2*count]; |
2661 count++; | |
2662 } | |
2663 } | |
2664 | |
2665 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) | |
2666 { | |
2667 dst0+= count; | |
2668 dst1+= count; | |
2669 src += 4*count; | |
2670 count= - count; | |
2671 #if HAVE_MMX | |
29481 | 2672 if(count <= -8) { |
28962 | 2673 count += 7; |
2674 __asm__ volatile( | |
2675 "pcmpeqw %%mm7, %%mm7 \n\t" | |
2676 "psrlw $8, %%mm7 \n\t" | |
2677 "1: \n\t" | |
2678 "movq -28(%1, %0, 4), %%mm0 \n\t" | |
2679 "movq -20(%1, %0, 4), %%mm1 \n\t" | |
2680 "movq -12(%1, %0, 4), %%mm2 \n\t" | |
2681 "movq -4(%1, %0, 4), %%mm3 \n\t" | |
2682 "pand %%mm7, %%mm0 \n\t" | |
2683 "pand %%mm7, %%mm1 \n\t" | |
2684 "pand %%mm7, %%mm2 \n\t" | |
2685 "pand %%mm7, %%mm3 \n\t" | |
2686 "packuswb %%mm1, %%mm0 \n\t" | |
2687 "packuswb %%mm3, %%mm2 \n\t" | |
2688 "movq %%mm0, %%mm1 \n\t" | |
2689 "movq %%mm2, %%mm3 \n\t" | |
2690 "psrlw $8, %%mm0 \n\t" | |
2691 "psrlw $8, %%mm2 \n\t" | |
2692 "pand %%mm7, %%mm1 \n\t" | |
2693 "pand %%mm7, %%mm3 \n\t" | |
2694 "packuswb %%mm2, %%mm0 \n\t" | |
2695 "packuswb %%mm3, %%mm1 \n\t" | |
2696 MOVNTQ" %%mm0,- 7(%3, %0) \n\t" | |
2697 MOVNTQ" %%mm1,- 7(%2, %0) \n\t" | |
2698 "add $8, %0 \n\t" | |
2699 " js 1b \n\t" | |
2700 : "+r"(count) | |
2701 : "r"(src), "r"(dst0), "r"(dst1) | |
2702 ); | |
2703 count -= 7; | |
2704 } | |
2705 #endif | |
29481 | 2706 while(count<0) { |
28962 | 2707 dst0[count]= src[4*count+0]; |
2708 dst1[count]= src[4*count+2]; | |
2709 count++; | |
2710 } | |
2711 } | |
2712 | |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2713 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2714 { |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2715 dst0 += count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2716 dst1 += count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2717 src0 += 4*count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2718 src1 += 4*count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2719 count= - count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2720 #ifdef PAVGB |
29481 | 2721 if(count <= -8) { |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2722 count += 7; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2723 __asm__ volatile( |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2724 "pcmpeqw %%mm7, %%mm7 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2725 "psrlw $8, %%mm7 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2726 "1: \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2727 "movq -28(%1, %0, 4), %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2728 "movq -20(%1, %0, 4), %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2729 "movq -12(%1, %0, 4), %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2730 "movq -4(%1, %0, 4), %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2731 PAVGB" -28(%2, %0, 4), %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2732 PAVGB" -20(%2, %0, 4), %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2733 PAVGB" -12(%2, %0, 4), %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2734 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2735 "pand %%mm7, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2736 "pand %%mm7, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2737 "pand %%mm7, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2738 "pand %%mm7, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2739 "packuswb %%mm1, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2740 "packuswb %%mm3, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2741 "movq %%mm0, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2742 "movq %%mm2, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2743 "psrlw $8, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2744 "psrlw $8, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2745 "pand %%mm7, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2746 "pand %%mm7, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2747 "packuswb %%mm2, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2748 "packuswb %%mm3, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2749 MOVNTQ" %%mm0,- 7(%4, %0) \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2750 MOVNTQ" %%mm1,- 7(%3, %0) \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2751 "add $8, %0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2752 " js 1b \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2753 : "+r"(count) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2754 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2755 ); |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2756 count -= 7; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2757 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2758 #endif |
29481 | 2759 while(count<0) { |
28995
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2760 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2761 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2762 count++; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2763 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2764 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2765 |
28962 | 2766 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
2767 { | |
2768 dst0+= count; | |
2769 dst1+= count; | |
2770 src += 4*count; | |
2771 count= - count; | |
2772 #if HAVE_MMX | |
29481 | 2773 if(count <= -8) { |
28962 | 2774 count += 7; |
2775 __asm__ volatile( | |
2776 "pcmpeqw %%mm7, %%mm7 \n\t" | |
2777 "psrlw $8, %%mm7 \n\t" | |
2778 "1: \n\t" | |
2779 "movq -28(%1, %0, 4), %%mm0 \n\t" | |
2780 "movq -20(%1, %0, 4), %%mm1 \n\t" | |
2781 "movq -12(%1, %0, 4), %%mm2 \n\t" | |
2782 "movq -4(%1, %0, 4), %%mm3 \n\t" | |
2783 "psrlw $8, %%mm0 \n\t" | |
2784 "psrlw $8, %%mm1 \n\t" | |
2785 "psrlw $8, %%mm2 \n\t" | |
2786 "psrlw $8, %%mm3 \n\t" | |
2787 "packuswb %%mm1, %%mm0 \n\t" | |
2788 "packuswb %%mm3, %%mm2 \n\t" | |
2789 "movq %%mm0, %%mm1 \n\t" | |
2790 "movq %%mm2, %%mm3 \n\t" | |
2791 "psrlw $8, %%mm0 \n\t" | |
2792 "psrlw $8, %%mm2 \n\t" | |
2793 "pand %%mm7, %%mm1 \n\t" | |
2794 "pand %%mm7, %%mm3 \n\t" | |
2795 "packuswb %%mm2, %%mm0 \n\t" | |
2796 "packuswb %%mm3, %%mm1 \n\t" | |
2797 MOVNTQ" %%mm0,- 7(%3, %0) \n\t" | |
2798 MOVNTQ" %%mm1,- 7(%2, %0) \n\t" | |
2799 "add $8, %0 \n\t" | |
2800 " js 1b \n\t" | |
2801 : "+r"(count) | |
2802 : "r"(src), "r"(dst0), "r"(dst1) | |
2803 ); | |
2804 count -= 7; | |
2805 } | |
2806 #endif | |
28995
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2807 src++; |
29481 | 2808 while(count<0) { |
28962 | 2809 dst0[count]= src[4*count+0]; |
2810 dst1[count]= src[4*count+2]; | |
2811 count++; | |
2812 } | |
2813 } | |
2814 | |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2815 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2816 { |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2817 dst0 += count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2818 dst1 += count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2819 src0 += 4*count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2820 src1 += 4*count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2821 count= - count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2822 #ifdef PAVGB |
29481 | 2823 if(count <= -8) { |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2824 count += 7; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2825 __asm__ volatile( |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2826 "pcmpeqw %%mm7, %%mm7 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2827 "psrlw $8, %%mm7 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2828 "1: \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2829 "movq -28(%1, %0, 4), %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2830 "movq -20(%1, %0, 4), %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2831 "movq -12(%1, %0, 4), %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2832 "movq -4(%1, %0, 4), %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2833 PAVGB" -28(%2, %0, 4), %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2834 PAVGB" -20(%2, %0, 4), %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2835 PAVGB" -12(%2, %0, 4), %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2836 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2837 "psrlw $8, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2838 "psrlw $8, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2839 "psrlw $8, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2840 "psrlw $8, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2841 "packuswb %%mm1, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2842 "packuswb %%mm3, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2843 "movq %%mm0, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2844 "movq %%mm2, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2845 "psrlw $8, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2846 "psrlw $8, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2847 "pand %%mm7, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2848 "pand %%mm7, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2849 "packuswb %%mm2, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2850 "packuswb %%mm3, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2851 MOVNTQ" %%mm0,- 7(%4, %0) \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2852 MOVNTQ" %%mm1,- 7(%3, %0) \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2853 "add $8, %0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2854 " js 1b \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2855 : "+r"(count) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2856 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2857 ); |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2858 count -= 7; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2859 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2860 #endif |
28995
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2861 src0++; |
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2862 src1++; |
29481 | 2863 while(count<0) { |
28995
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2864 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2865 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2866 count++; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2867 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2868 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2869 |
28962 | 2870 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
2871 long width, long height, | |
2872 long lumStride, long chromStride, long srcStride) | |
2873 { | |
2874 long y; | |
2875 const long chromWidth= -((-width)>>1); | |
2876 | |
29481 | 2877 for (y=0; y<height; y++) { |
28962 | 2878 RENAME(extract_even)(src, ydst, width); |
29481 | 2879 if(y&1) { |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2880 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); |
28962 | 2881 udst+= chromStride; |
2882 vdst+= chromStride; | |
2883 } | |
2884 | |
2885 src += srcStride; | |
2886 ydst+= lumStride; | |
2887 } | |
2888 #if HAVE_MMX | |
2889 __asm__( | |
29480 | 2890 EMMS" \n\t" |
2891 SFENCE" \n\t" | |
2892 ::: "memory" | |
28962 | 2893 ); |
2894 #endif | |
2895 } | |
2896 | |
2897 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | |
2898 long width, long height, | |
2899 long lumStride, long chromStride, long srcStride) | |
2900 { | |
2901 long y; | |
2902 const long chromWidth= -((-width)>>1); | |
2903 | |
29481 | 2904 for (y=0; y<height; y++) { |
28962 | 2905 RENAME(extract_even)(src, ydst, width); |
2906 RENAME(extract_odd2)(src, udst, vdst, chromWidth); | |
2907 | |
2908 src += srcStride; | |
2909 ydst+= lumStride; | |
2910 udst+= chromStride; | |
2911 vdst+= chromStride; | |
2912 } | |
2913 #if HAVE_MMX | |
2914 __asm__( | |
29480 | 2915 EMMS" \n\t" |
2916 SFENCE" \n\t" | |
2917 ::: "memory" | |
28962 | 2918 ); |
2919 #endif | |
2920 } | |
2921 | |
2922 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | |
2923 long width, long height, | |
2924 long lumStride, long chromStride, long srcStride) | |
2925 { | |
2926 long y; | |
2927 const long chromWidth= -((-width)>>1); | |
2928 | |
29481 | 2929 for (y=0; y<height; y++) { |
28962 | 2930 RENAME(extract_even)(src+1, ydst, width); |
29481 | 2931 if(y&1) { |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2932 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); |
28962 | 2933 udst+= chromStride; |
2934 vdst+= chromStride; | |
2935 } | |
2936 | |
2937 src += srcStride; | |
2938 ydst+= lumStride; | |
2939 } | |
2940 #if HAVE_MMX | |
2941 __asm__( | |
29480 | 2942 EMMS" \n\t" |
2943 SFENCE" \n\t" | |
2944 ::: "memory" | |
28962 | 2945 ); |
2946 #endif | |
2947 } | |
2948 | |
2949 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | |
2950 long width, long height, | |
2951 long lumStride, long chromStride, long srcStride) | |
2952 { | |
2953 long y; | |
2954 const long chromWidth= -((-width)>>1); | |
2955 | |
29481 | 2956 for (y=0; y<height; y++) { |
28962 | 2957 RENAME(extract_even)(src+1, ydst, width); |
2958 RENAME(extract_even2)(src, udst, vdst, chromWidth); | |
2959 | |
2960 src += srcStride; | |
2961 ydst+= lumStride; | |
2962 udst+= chromStride; | |
2963 vdst+= chromStride; | |
2964 } | |
2965 #if HAVE_MMX | |
2966 __asm__( | |
29480 | 2967 EMMS" \n\t" |
2968 SFENCE" \n\t" | |
2969 ::: "memory" | |
28962 | 2970 ); |
2971 #endif | |
2972 } | |
2973 | |
29481 | 2974 static inline void RENAME(rgb2rgb_init)(void) |
2975 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2976 rgb15to16 = RENAME(rgb15to16); |
27486 | 2977 rgb15tobgr24 = RENAME(rgb15tobgr24); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2978 rgb15to32 = RENAME(rgb15to32); |
27486 | 2979 rgb16tobgr24 = RENAME(rgb16tobgr24); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2980 rgb16to32 = RENAME(rgb16to32); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2981 rgb16to15 = RENAME(rgb16to15); |
27486 | 2982 rgb24tobgr16 = RENAME(rgb24tobgr16); |
2983 rgb24tobgr15 = RENAME(rgb24tobgr15); | |
2984 rgb24tobgr32 = RENAME(rgb24tobgr32); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2985 rgb32to16 = RENAME(rgb32to16); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2986 rgb32to15 = RENAME(rgb32to15); |
27486 | 2987 rgb32tobgr24 = RENAME(rgb32tobgr24); |
2988 rgb24to15 = RENAME(rgb24to15); | |
2989 rgb24to16 = RENAME(rgb24to16); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2990 rgb24tobgr24 = RENAME(rgb24tobgr24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2991 rgb32tobgr32 = RENAME(rgb32tobgr32); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2992 rgb32tobgr16 = RENAME(rgb32tobgr16); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2993 rgb32tobgr15 = RENAME(rgb32tobgr15); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2994 yv12toyuy2 = RENAME(yv12toyuy2); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2995 yv12touyvy = RENAME(yv12touyvy); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2996 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); |
27495 | 2997 yuv422ptouyvy = RENAME(yuv422ptouyvy); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2998 yuy2toyv12 = RENAME(yuy2toyv12); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2999 // yvu9toyv12 = RENAME(yvu9toyv12); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3000 planar2x = RENAME(planar2x); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3001 rgb24toyv12 = RENAME(rgb24toyv12); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3002 interleaveBytes = RENAME(interleaveBytes); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3003 vu9_to_vu12 = RENAME(vu9_to_vu12); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3004 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); |
28962 | 3005 |
3006 uyvytoyuv420 = RENAME(uyvytoyuv420); | |
3007 uyvytoyuv422 = RENAME(uyvytoyuv422); | |
3008 yuyvtoyuv420 = RENAME(yuyvtoyuv420); | |
3009 yuyvtoyuv422 = RENAME(yuyvtoyuv422); | |
22960 | 3010 } |