Mercurial > mplayer.hg
annotate libswscale/rgb2rgb_template.c @ 29480:a4d8dee13834
Indent libswscale:
- Use 4 spaces throughout for indentation;
- Fix inconsistent indentation;
- Indent function calls and declarations aligning arguments on multiple lines
to the column after the opening parentheses;
- Align asm code to the column 4 spaces after the call to __asm__();
- Align cases in switch statements to the same column as "switch".
author | ramiro |
---|---|
date | Sun, 16 Aug 2009 00:32:04 +0000 |
parents | 0673fad0546f |
children | c080f1f5c07e |
rev | line source |
---|---|
18861 | 1 /* |
27158 | 2 * software RGB to RGB converter |
3 * pluralize by software PAL8 to RGB converter | |
4 * software YUV to YUV converter | |
5 * software YUV to RGB converter | |
6 * Written by Nick Kurshev. | |
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) | |
8 * lot of big-endian byte order fixes by Alex Beregszaszi | |
19703
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
9 * |
20094
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
10 * This file is part of FFmpeg. |
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
11 * |
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
12 * FFmpeg is free software; you can redistribute it and/or modify |
19703
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
13 * it under the terms of the GNU General Public License as published by |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
14 * the Free Software Foundation; either version 2 of the License, or |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
15 * (at your option) any later version. |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
16 * |
20094
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
17 * FFmpeg is distributed in the hope that it will be useful, |
19703
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
20 * GNU General Public License for more details. |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
21 * |
ad7f49a1ba95
Add official GPL header to make license explicit as discussed on ffmpeg-devel.
diego
parents:
19396
diff
changeset
|
22 * You should have received a copy of the GNU General Public License |
20094
aca9e9783f67
Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents:
19703
diff
changeset
|
23 * along with FFmpeg; if not, write to the Free Software |
23702 | 24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
23129 | 25 * |
27158 | 26 * The C code (not assembly, MMX, ...) of this file can be used |
25109 | 27 * under the LGPL license. |
18861 | 28 */ |
29 | |
30 #include <stddef.h> | |
31 | |
32 #undef PREFETCH | |
33 #undef MOVNTQ | |
34 #undef EMMS | |
35 #undef SFENCE | |
36 #undef MMREG_SIZE | |
37 #undef PREFETCHW | |
38 #undef PAVGB | |
39 | |
28276 | 40 #if HAVE_SSE2 |
18861 | 41 #define MMREG_SIZE 16 |
42 #else | |
43 #define MMREG_SIZE 8 | |
44 #endif | |
45 | |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
46 #if HAVE_AMD3DNOW |
18861 | 47 #define PREFETCH "prefetch" |
48 #define PREFETCHW "prefetchw" | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
49 #define PAVGB "pavgusb" |
28276 | 50 #elif HAVE_MMX2 |
18861 | 51 #define PREFETCH "prefetchnta" |
52 #define PREFETCHW "prefetcht0" | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
53 #define PAVGB "pavgb" |
18861 | 54 #else |
20724
b8fe18a742ce
Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents:
20094
diff
changeset
|
55 #define PREFETCH " # nop" |
b8fe18a742ce
Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents:
20094
diff
changeset
|
56 #define PREFETCHW " # nop" |
18861 | 57 #endif |
58 | |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
59 #if HAVE_AMD3DNOW |
27782 | 60 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ |
18861 | 61 #define EMMS "femms" |
62 #else | |
63 #define EMMS "emms" | |
64 #endif | |
65 | |
28276 | 66 #if HAVE_MMX2 |
18861 | 67 #define MOVNTQ "movntq" |
68 #define SFENCE "sfence" | |
69 #else | |
70 #define MOVNTQ "movq" | |
20724
b8fe18a742ce
Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents:
20094
diff
changeset
|
71 #define SFENCE " # nop" |
18861 | 72 #endif |
73 | |
27486 | 74 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 75 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
76 uint8_t *dest = dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
77 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
78 const uint8_t *end; |
28276 | 79 #if HAVE_MMX |
29480 | 80 const uint8_t *mm_end; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
81 #endif |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
82 end = s + src_size; |
28276 | 83 #if HAVE_MMX |
29480 | 84 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
85 mm_end = end - 23; | |
86 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); | |
87 while (s < mm_end) | |
88 { | |
89 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
90 PREFETCH" 32%1 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
91 "movd %1, %%mm0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
92 "punpckldq 3%1, %%mm0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
93 "movd 6%1, %%mm1 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
94 "punpckldq 9%1, %%mm1 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
95 "movd 12%1, %%mm2 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
96 "punpckldq 15%1, %%mm2 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
97 "movd 18%1, %%mm3 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
98 "punpckldq 21%1, %%mm3 \n\t" |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
99 "por %%mm7, %%mm0 \n\t" |
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
100 "por %%mm7, %%mm1 \n\t" |
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
101 "por %%mm7, %%mm2 \n\t" |
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
102 "por %%mm7, %%mm3 \n\t" |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
103 MOVNTQ" %%mm0, %0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
104 MOVNTQ" %%mm1, 8%0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
105 MOVNTQ" %%mm2, 16%0 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
106 MOVNTQ" %%mm3, 24%0" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
107 :"=m"(*dest) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
108 :"m"(*s) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
109 :"memory"); |
29480 | 110 dest += 32; |
111 s += 24; | |
112 } | |
113 __asm__ volatile(SFENCE:::"memory"); | |
114 __asm__ volatile(EMMS:::"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
115 #endif |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
116 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
117 { |
29397 | 118 #if HAVE_BIGENDIAN |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
119 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
120 *dest++ = 255; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
121 *dest++ = s[2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
122 *dest++ = s[1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
123 *dest++ = s[0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
124 s+=3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
125 #else |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
126 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
127 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
128 *dest++ = *s++; |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
129 *dest++ = 255; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
130 #endif |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
131 } |
18861 | 132 } |
133 | |
27486 | 134 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 135 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
136 uint8_t *dest = dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
137 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
138 const uint8_t *end; |
28276 | 139 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
140 const uint8_t *mm_end; |
18861 | 141 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
142 end = s + src_size; |
28276 | 143 #if HAVE_MMX |
27744 | 144 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
145 mm_end = end - 31; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
146 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
147 { |
27744 | 148 __asm__ volatile( |
29480 | 149 PREFETCH" 32%1 \n\t" |
150 "movq %1, %%mm0 \n\t" | |
151 "movq 8%1, %%mm1 \n\t" | |
152 "movq 16%1, %%mm4 \n\t" | |
153 "movq 24%1, %%mm5 \n\t" | |
154 "movq %%mm0, %%mm2 \n\t" | |
155 "movq %%mm1, %%mm3 \n\t" | |
156 "movq %%mm4, %%mm6 \n\t" | |
157 "movq %%mm5, %%mm7 \n\t" | |
158 "psrlq $8, %%mm2 \n\t" | |
159 "psrlq $8, %%mm3 \n\t" | |
160 "psrlq $8, %%mm6 \n\t" | |
161 "psrlq $8, %%mm7 \n\t" | |
162 "pand %2, %%mm0 \n\t" | |
163 "pand %2, %%mm1 \n\t" | |
164 "pand %2, %%mm4 \n\t" | |
165 "pand %2, %%mm5 \n\t" | |
166 "pand %3, %%mm2 \n\t" | |
167 "pand %3, %%mm3 \n\t" | |
168 "pand %3, %%mm6 \n\t" | |
169 "pand %3, %%mm7 \n\t" | |
170 "por %%mm2, %%mm0 \n\t" | |
171 "por %%mm3, %%mm1 \n\t" | |
172 "por %%mm6, %%mm4 \n\t" | |
173 "por %%mm7, %%mm5 \n\t" | |
18861 | 174 |
29480 | 175 "movq %%mm1, %%mm2 \n\t" |
176 "movq %%mm4, %%mm3 \n\t" | |
177 "psllq $48, %%mm2 \n\t" | |
178 "psllq $32, %%mm3 \n\t" | |
179 "pand %4, %%mm2 \n\t" | |
180 "pand %5, %%mm3 \n\t" | |
181 "por %%mm2, %%mm0 \n\t" | |
182 "psrlq $16, %%mm1 \n\t" | |
183 "psrlq $32, %%mm4 \n\t" | |
184 "psllq $16, %%mm5 \n\t" | |
185 "por %%mm3, %%mm1 \n\t" | |
186 "pand %6, %%mm5 \n\t" | |
187 "por %%mm5, %%mm4 \n\t" | |
18861 | 188 |
29480 | 189 MOVNTQ" %%mm0, %0 \n\t" |
190 MOVNTQ" %%mm1, 8%0 \n\t" | |
191 MOVNTQ" %%mm4, 16%0" | |
192 :"=m"(*dest) | |
193 :"m"(*s),"m"(mask24l), | |
194 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
195 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
196 dest += 24; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
197 s += 32; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
198 } |
27744 | 199 __asm__ volatile(SFENCE:::"memory"); |
200 __asm__ volatile(EMMS:::"memory"); | |
18861 | 201 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
202 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
203 { |
29397 | 204 #if HAVE_BIGENDIAN |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
205 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
206 s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
207 dest[2] = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
208 dest[1] = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
209 dest[0] = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
210 dest += 3; |
18861 | 211 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
212 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
213 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
214 *dest++ = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
215 s++; |
18861 | 216 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
217 } |
18861 | 218 } |
219 | |
220 /* | |
27158 | 221 original by Strepto/Astral |
222 ported to gcc & bugfixed: A'rpi | |
18861 | 223 MMX2, 3DNOW optimization by Nick Kurshev |
27158 | 224 32-bit C version, and and&add trick by Michael Niedermayer |
18861 | 225 */ |
25750 | 226 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 227 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
228 register const uint8_t* s=src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
229 register uint8_t* d=dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
230 register const uint8_t *end; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
231 const uint8_t *mm_end; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
232 end = s + src_size; |
28276 | 233 #if HAVE_MMX |
27744 | 234 __asm__ volatile(PREFETCH" %0"::"m"(*s)); |
235 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
236 mm_end = end - 15; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
237 while (s<mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
238 { |
27744 | 239 __asm__ volatile( |
29480 | 240 PREFETCH" 32%1 \n\t" |
241 "movq %1, %%mm0 \n\t" | |
242 "movq 8%1, %%mm2 \n\t" | |
243 "movq %%mm0, %%mm1 \n\t" | |
244 "movq %%mm2, %%mm3 \n\t" | |
245 "pand %%mm4, %%mm0 \n\t" | |
246 "pand %%mm4, %%mm2 \n\t" | |
247 "paddw %%mm1, %%mm0 \n\t" | |
248 "paddw %%mm3, %%mm2 \n\t" | |
249 MOVNTQ" %%mm0, %0 \n\t" | |
250 MOVNTQ" %%mm2, 8%0" | |
251 :"=m"(*d) | |
252 :"m"(*s) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
253 ); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
254 d+=16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
255 s+=16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
256 } |
27744 | 257 __asm__ volatile(SFENCE:::"memory"); |
258 __asm__ volatile(EMMS:::"memory"); | |
18861 | 259 #endif |
260 mm_end = end - 3; | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
261 while (s < mm_end) |
18861 | 262 { |
26910 | 263 register unsigned x= *((const uint32_t *)s); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
264 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
265 d+=4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
266 s+=4; |
18861 | 267 } |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
268 if (s < end) |
18861 | 269 { |
26910 | 270 register unsigned short x= *((const uint16_t *)s); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
271 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); |
18861 | 272 } |
273 } | |
274 | |
25750 | 275 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 276 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
277 register const uint8_t* s=src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
278 register uint8_t* d=dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
279 register const uint8_t *end; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
280 const uint8_t *mm_end; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
281 end = s + src_size; |
28276 | 282 #if HAVE_MMX |
27744 | 283 __asm__ volatile(PREFETCH" %0"::"m"(*s)); |
284 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); | |
285 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
286 mm_end = end - 15; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
287 while (s<mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
288 { |
27744 | 289 __asm__ volatile( |
29480 | 290 PREFETCH" 32%1 \n\t" |
291 "movq %1, %%mm0 \n\t" | |
292 "movq 8%1, %%mm2 \n\t" | |
293 "movq %%mm0, %%mm1 \n\t" | |
294 "movq %%mm2, %%mm3 \n\t" | |
295 "psrlq $1, %%mm0 \n\t" | |
296 "psrlq $1, %%mm2 \n\t" | |
297 "pand %%mm7, %%mm0 \n\t" | |
298 "pand %%mm7, %%mm2 \n\t" | |
299 "pand %%mm6, %%mm1 \n\t" | |
300 "pand %%mm6, %%mm3 \n\t" | |
301 "por %%mm1, %%mm0 \n\t" | |
302 "por %%mm3, %%mm2 \n\t" | |
303 MOVNTQ" %%mm0, %0 \n\t" | |
304 MOVNTQ" %%mm2, 8%0" | |
305 :"=m"(*d) | |
306 :"m"(*s) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
307 ); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
308 d+=16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
309 s+=16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
310 } |
27744 | 311 __asm__ volatile(SFENCE:::"memory"); |
312 __asm__ volatile(EMMS:::"memory"); | |
18861 | 313 #endif |
314 mm_end = end - 3; | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
315 while (s < mm_end) |
18861 | 316 { |
26925
3f6d2ca29727
restore needed cast to correct type with const
bcoudurier
parents:
26910
diff
changeset
|
317 register uint32_t x= *((const uint32_t*)s); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
318 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
319 s+=4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
320 d+=4; |
18861 | 321 } |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
322 if (s < end) |
18861 | 323 { |
26925
3f6d2ca29727
restore needed cast to correct type with const
bcoudurier
parents:
26910
diff
changeset
|
324 register uint16_t x= *((const uint16_t*)s); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
325 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); |
18861 | 326 } |
327 } | |
328 | |
329 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size) | |
330 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
331 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
332 const uint8_t *end; |
28276 | 333 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
334 const uint8_t *mm_end; |
18861 | 335 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
336 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
337 end = s + src_size; |
28276 | 338 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
339 mm_end = end - 15; |
25109 | 340 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) |
27744 | 341 __asm__ volatile( |
29480 | 342 "movq %3, %%mm5 \n\t" |
343 "movq %4, %%mm6 \n\t" | |
344 "movq %5, %%mm7 \n\t" | |
345 "jmp 2f \n\t" | |
346 ASMALIGN(4) | |
347 "1: \n\t" | |
348 PREFETCH" 32(%1) \n\t" | |
349 "movd (%1), %%mm0 \n\t" | |
350 "movd 4(%1), %%mm3 \n\t" | |
351 "punpckldq 8(%1), %%mm0 \n\t" | |
352 "punpckldq 12(%1), %%mm3 \n\t" | |
353 "movq %%mm0, %%mm1 \n\t" | |
354 "movq %%mm3, %%mm4 \n\t" | |
355 "pand %%mm6, %%mm0 \n\t" | |
356 "pand %%mm6, %%mm3 \n\t" | |
357 "pmaddwd %%mm7, %%mm0 \n\t" | |
358 "pmaddwd %%mm7, %%mm3 \n\t" | |
359 "pand %%mm5, %%mm1 \n\t" | |
360 "pand %%mm5, %%mm4 \n\t" | |
361 "por %%mm1, %%mm0 \n\t" | |
362 "por %%mm4, %%mm3 \n\t" | |
363 "psrld $5, %%mm0 \n\t" | |
364 "pslld $11, %%mm3 \n\t" | |
365 "por %%mm3, %%mm0 \n\t" | |
366 MOVNTQ" %%mm0, (%0) \n\t" | |
367 "add $16, %1 \n\t" | |
368 "add $8, %0 \n\t" | |
369 "2: \n\t" | |
370 "cmp %2, %1 \n\t" | |
371 " jb 1b \n\t" | |
372 : "+r" (d), "+r"(s) | |
373 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
374 ); |
18861 | 375 #else |
27744 | 376 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
377 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
378 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
379 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
380 ::"m"(red_16mask),"m"(green_16mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
381 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
382 { |
27744 | 383 __asm__ volatile( |
29480 | 384 PREFETCH" 32%1 \n\t" |
385 "movd %1, %%mm0 \n\t" | |
386 "movd 4%1, %%mm3 \n\t" | |
387 "punpckldq 8%1, %%mm0 \n\t" | |
388 "punpckldq 12%1, %%mm3 \n\t" | |
389 "movq %%mm0, %%mm1 \n\t" | |
390 "movq %%mm0, %%mm2 \n\t" | |
391 "movq %%mm3, %%mm4 \n\t" | |
392 "movq %%mm3, %%mm5 \n\t" | |
393 "psrlq $3, %%mm0 \n\t" | |
394 "psrlq $3, %%mm3 \n\t" | |
395 "pand %2, %%mm0 \n\t" | |
396 "pand %2, %%mm3 \n\t" | |
397 "psrlq $5, %%mm1 \n\t" | |
398 "psrlq $5, %%mm4 \n\t" | |
399 "pand %%mm6, %%mm1 \n\t" | |
400 "pand %%mm6, %%mm4 \n\t" | |
401 "psrlq $8, %%mm2 \n\t" | |
402 "psrlq $8, %%mm5 \n\t" | |
403 "pand %%mm7, %%mm2 \n\t" | |
404 "pand %%mm7, %%mm5 \n\t" | |
405 "por %%mm1, %%mm0 \n\t" | |
406 "por %%mm4, %%mm3 \n\t" | |
407 "por %%mm2, %%mm0 \n\t" | |
408 "por %%mm5, %%mm3 \n\t" | |
409 "psllq $16, %%mm3 \n\t" | |
410 "por %%mm3, %%mm0 \n\t" | |
411 MOVNTQ" %%mm0, %0 \n\t" | |
412 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
413 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
414 s += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
415 } |
18861 | 416 #endif |
27744 | 417 __asm__ volatile(SFENCE:::"memory"); |
418 __asm__ volatile(EMMS:::"memory"); | |
18861 | 419 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
420 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
421 { |
26910 | 422 register int rgb = *(const uint32_t*)s; s += 4; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
423 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
424 } |
18861 | 425 } |
426 | |
427 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) | |
428 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
429 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
430 const uint8_t *end; |
28276 | 431 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
432 const uint8_t *mm_end; |
18861 | 433 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
434 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
435 end = s + src_size; |
28276 | 436 #if HAVE_MMX |
27744 | 437 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
438 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
439 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
440 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
441 ::"m"(red_16mask),"m"(green_16mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
442 mm_end = end - 15; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
443 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
444 { |
27744 | 445 __asm__ volatile( |
29480 | 446 PREFETCH" 32%1 \n\t" |
447 "movd %1, %%mm0 \n\t" | |
448 "movd 4%1, %%mm3 \n\t" | |
449 "punpckldq 8%1, %%mm0 \n\t" | |
450 "punpckldq 12%1, %%mm3 \n\t" | |
451 "movq %%mm0, %%mm1 \n\t" | |
452 "movq %%mm0, %%mm2 \n\t" | |
453 "movq %%mm3, %%mm4 \n\t" | |
454 "movq %%mm3, %%mm5 \n\t" | |
455 "psllq $8, %%mm0 \n\t" | |
456 "psllq $8, %%mm3 \n\t" | |
457 "pand %%mm7, %%mm0 \n\t" | |
458 "pand %%mm7, %%mm3 \n\t" | |
459 "psrlq $5, %%mm1 \n\t" | |
460 "psrlq $5, %%mm4 \n\t" | |
461 "pand %%mm6, %%mm1 \n\t" | |
462 "pand %%mm6, %%mm4 \n\t" | |
463 "psrlq $19, %%mm2 \n\t" | |
464 "psrlq $19, %%mm5 \n\t" | |
465 "pand %2, %%mm2 \n\t" | |
466 "pand %2, %%mm5 \n\t" | |
467 "por %%mm1, %%mm0 \n\t" | |
468 "por %%mm4, %%mm3 \n\t" | |
469 "por %%mm2, %%mm0 \n\t" | |
470 "por %%mm5, %%mm3 \n\t" | |
471 "psllq $16, %%mm3 \n\t" | |
472 "por %%mm3, %%mm0 \n\t" | |
473 MOVNTQ" %%mm0, %0 \n\t" | |
474 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
475 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
476 s += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
477 } |
27744 | 478 __asm__ volatile(SFENCE:::"memory"); |
479 __asm__ volatile(EMMS:::"memory"); | |
18861 | 480 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
481 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
482 { |
26910 | 483 register int rgb = *(const uint32_t*)s; s += 4; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
484 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
485 } |
18861 | 486 } |
487 | |
488 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size) | |
489 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
490 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
491 const uint8_t *end; |
28276 | 492 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
493 const uint8_t *mm_end; |
18861 | 494 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
495 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
496 end = s + src_size; |
28276 | 497 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
498 mm_end = end - 15; |
25109 | 499 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) |
27744 | 500 __asm__ volatile( |
29480 | 501 "movq %3, %%mm5 \n\t" |
502 "movq %4, %%mm6 \n\t" | |
503 "movq %5, %%mm7 \n\t" | |
504 "jmp 2f \n\t" | |
505 ASMALIGN(4) | |
506 "1: \n\t" | |
507 PREFETCH" 32(%1) \n\t" | |
508 "movd (%1), %%mm0 \n\t" | |
509 "movd 4(%1), %%mm3 \n\t" | |
510 "punpckldq 8(%1), %%mm0 \n\t" | |
511 "punpckldq 12(%1), %%mm3 \n\t" | |
512 "movq %%mm0, %%mm1 \n\t" | |
513 "movq %%mm3, %%mm4 \n\t" | |
514 "pand %%mm6, %%mm0 \n\t" | |
515 "pand %%mm6, %%mm3 \n\t" | |
516 "pmaddwd %%mm7, %%mm0 \n\t" | |
517 "pmaddwd %%mm7, %%mm3 \n\t" | |
518 "pand %%mm5, %%mm1 \n\t" | |
519 "pand %%mm5, %%mm4 \n\t" | |
520 "por %%mm1, %%mm0 \n\t" | |
521 "por %%mm4, %%mm3 \n\t" | |
522 "psrld $6, %%mm0 \n\t" | |
523 "pslld $10, %%mm3 \n\t" | |
524 "por %%mm3, %%mm0 \n\t" | |
525 MOVNTQ" %%mm0, (%0) \n\t" | |
526 "add $16, %1 \n\t" | |
527 "add $8, %0 \n\t" | |
528 "2: \n\t" | |
529 "cmp %2, %1 \n\t" | |
530 " jb 1b \n\t" | |
531 : "+r" (d), "+r"(s) | |
532 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
533 ); |
18861 | 534 #else |
27744 | 535 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
536 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
537 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
538 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
539 ::"m"(red_15mask),"m"(green_15mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
540 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
541 { |
27744 | 542 __asm__ volatile( |
29480 | 543 PREFETCH" 32%1 \n\t" |
544 "movd %1, %%mm0 \n\t" | |
545 "movd 4%1, %%mm3 \n\t" | |
546 "punpckldq 8%1, %%mm0 \n\t" | |
547 "punpckldq 12%1, %%mm3 \n\t" | |
548 "movq %%mm0, %%mm1 \n\t" | |
549 "movq %%mm0, %%mm2 \n\t" | |
550 "movq %%mm3, %%mm4 \n\t" | |
551 "movq %%mm3, %%mm5 \n\t" | |
552 "psrlq $3, %%mm0 \n\t" | |
553 "psrlq $3, %%mm3 \n\t" | |
554 "pand %2, %%mm0 \n\t" | |
555 "pand %2, %%mm3 \n\t" | |
556 "psrlq $6, %%mm1 \n\t" | |
557 "psrlq $6, %%mm4 \n\t" | |
558 "pand %%mm6, %%mm1 \n\t" | |
559 "pand %%mm6, %%mm4 \n\t" | |
560 "psrlq $9, %%mm2 \n\t" | |
561 "psrlq $9, %%mm5 \n\t" | |
562 "pand %%mm7, %%mm2 \n\t" | |
563 "pand %%mm7, %%mm5 \n\t" | |
564 "por %%mm1, %%mm0 \n\t" | |
565 "por %%mm4, %%mm3 \n\t" | |
566 "por %%mm2, %%mm0 \n\t" | |
567 "por %%mm5, %%mm3 \n\t" | |
568 "psllq $16, %%mm3 \n\t" | |
569 "por %%mm3, %%mm0 \n\t" | |
570 MOVNTQ" %%mm0, %0 \n\t" | |
571 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
572 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
573 s += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
574 } |
18861 | 575 #endif |
27744 | 576 __asm__ volatile(SFENCE:::"memory"); |
577 __asm__ volatile(EMMS:::"memory"); | |
18861 | 578 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
579 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
580 { |
26910 | 581 register int rgb = *(const uint32_t*)s; s += 4; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
582 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
583 } |
18861 | 584 } |
585 | |
586 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) | |
587 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
588 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
589 const uint8_t *end; |
28276 | 590 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
591 const uint8_t *mm_end; |
18861 | 592 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
593 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
594 end = s + src_size; |
28276 | 595 #if HAVE_MMX |
27744 | 596 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
597 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
598 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
599 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
600 ::"m"(red_15mask),"m"(green_15mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
601 mm_end = end - 15; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
602 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
603 { |
27744 | 604 __asm__ volatile( |
29480 | 605 PREFETCH" 32%1 \n\t" |
606 "movd %1, %%mm0 \n\t" | |
607 "movd 4%1, %%mm3 \n\t" | |
608 "punpckldq 8%1, %%mm0 \n\t" | |
609 "punpckldq 12%1, %%mm3 \n\t" | |
610 "movq %%mm0, %%mm1 \n\t" | |
611 "movq %%mm0, %%mm2 \n\t" | |
612 "movq %%mm3, %%mm4 \n\t" | |
613 "movq %%mm3, %%mm5 \n\t" | |
614 "psllq $7, %%mm0 \n\t" | |
615 "psllq $7, %%mm3 \n\t" | |
616 "pand %%mm7, %%mm0 \n\t" | |
617 "pand %%mm7, %%mm3 \n\t" | |
618 "psrlq $6, %%mm1 \n\t" | |
619 "psrlq $6, %%mm4 \n\t" | |
620 "pand %%mm6, %%mm1 \n\t" | |
621 "pand %%mm6, %%mm4 \n\t" | |
622 "psrlq $19, %%mm2 \n\t" | |
623 "psrlq $19, %%mm5 \n\t" | |
624 "pand %2, %%mm2 \n\t" | |
625 "pand %2, %%mm5 \n\t" | |
626 "por %%mm1, %%mm0 \n\t" | |
627 "por %%mm4, %%mm3 \n\t" | |
628 "por %%mm2, %%mm0 \n\t" | |
629 "por %%mm5, %%mm3 \n\t" | |
630 "psllq $16, %%mm3 \n\t" | |
631 "por %%mm3, %%mm0 \n\t" | |
632 MOVNTQ" %%mm0, %0 \n\t" | |
633 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
634 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
635 s += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
636 } |
27744 | 637 __asm__ volatile(SFENCE:::"memory"); |
638 __asm__ volatile(EMMS:::"memory"); | |
18861 | 639 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
640 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
641 { |
26910 | 642 register int rgb = *(const uint32_t*)s; s += 4; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
643 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
644 } |
18861 | 645 } |
646 | |
27486 | 647 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 648 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
649 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
650 const uint8_t *end; |
28276 | 651 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
652 const uint8_t *mm_end; |
18861 | 653 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
654 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
655 end = s + src_size; |
28276 | 656 #if HAVE_MMX |
27744 | 657 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
658 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
659 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
660 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
661 ::"m"(red_16mask),"m"(green_16mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
662 mm_end = end - 11; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
663 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
664 { |
27744 | 665 __asm__ volatile( |
29480 | 666 PREFETCH" 32%1 \n\t" |
667 "movd %1, %%mm0 \n\t" | |
668 "movd 3%1, %%mm3 \n\t" | |
669 "punpckldq 6%1, %%mm0 \n\t" | |
670 "punpckldq 9%1, %%mm3 \n\t" | |
671 "movq %%mm0, %%mm1 \n\t" | |
672 "movq %%mm0, %%mm2 \n\t" | |
673 "movq %%mm3, %%mm4 \n\t" | |
674 "movq %%mm3, %%mm5 \n\t" | |
675 "psrlq $3, %%mm0 \n\t" | |
676 "psrlq $3, %%mm3 \n\t" | |
677 "pand %2, %%mm0 \n\t" | |
678 "pand %2, %%mm3 \n\t" | |
679 "psrlq $5, %%mm1 \n\t" | |
680 "psrlq $5, %%mm4 \n\t" | |
681 "pand %%mm6, %%mm1 \n\t" | |
682 "pand %%mm6, %%mm4 \n\t" | |
683 "psrlq $8, %%mm2 \n\t" | |
684 "psrlq $8, %%mm5 \n\t" | |
685 "pand %%mm7, %%mm2 \n\t" | |
686 "pand %%mm7, %%mm5 \n\t" | |
687 "por %%mm1, %%mm0 \n\t" | |
688 "por %%mm4, %%mm3 \n\t" | |
689 "por %%mm2, %%mm0 \n\t" | |
690 "por %%mm5, %%mm3 \n\t" | |
691 "psllq $16, %%mm3 \n\t" | |
692 "por %%mm3, %%mm0 \n\t" | |
693 MOVNTQ" %%mm0, %0 \n\t" | |
694 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
695 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
696 s += 12; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
697 } |
27744 | 698 __asm__ volatile(SFENCE:::"memory"); |
699 __asm__ volatile(EMMS:::"memory"); | |
18861 | 700 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
701 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
702 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
703 const int b = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
704 const int g = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
705 const int r = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
706 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
707 } |
18861 | 708 } |
709 | |
27486 | 710 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 711 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
712 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
713 const uint8_t *end; |
28276 | 714 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
715 const uint8_t *mm_end; |
18861 | 716 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
717 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
718 end = s + src_size; |
28276 | 719 #if HAVE_MMX |
27744 | 720 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
721 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
722 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
723 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
724 ::"m"(red_16mask),"m"(green_16mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
725 mm_end = end - 15; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
726 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
727 { |
27744 | 728 __asm__ volatile( |
29480 | 729 PREFETCH" 32%1 \n\t" |
730 "movd %1, %%mm0 \n\t" | |
731 "movd 3%1, %%mm3 \n\t" | |
732 "punpckldq 6%1, %%mm0 \n\t" | |
733 "punpckldq 9%1, %%mm3 \n\t" | |
734 "movq %%mm0, %%mm1 \n\t" | |
735 "movq %%mm0, %%mm2 \n\t" | |
736 "movq %%mm3, %%mm4 \n\t" | |
737 "movq %%mm3, %%mm5 \n\t" | |
738 "psllq $8, %%mm0 \n\t" | |
739 "psllq $8, %%mm3 \n\t" | |
740 "pand %%mm7, %%mm0 \n\t" | |
741 "pand %%mm7, %%mm3 \n\t" | |
742 "psrlq $5, %%mm1 \n\t" | |
743 "psrlq $5, %%mm4 \n\t" | |
744 "pand %%mm6, %%mm1 \n\t" | |
745 "pand %%mm6, %%mm4 \n\t" | |
746 "psrlq $19, %%mm2 \n\t" | |
747 "psrlq $19, %%mm5 \n\t" | |
748 "pand %2, %%mm2 \n\t" | |
749 "pand %2, %%mm5 \n\t" | |
750 "por %%mm1, %%mm0 \n\t" | |
751 "por %%mm4, %%mm3 \n\t" | |
752 "por %%mm2, %%mm0 \n\t" | |
753 "por %%mm5, %%mm3 \n\t" | |
754 "psllq $16, %%mm3 \n\t" | |
755 "por %%mm3, %%mm0 \n\t" | |
756 MOVNTQ" %%mm0, %0 \n\t" | |
757 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
758 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
759 s += 12; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
760 } |
27744 | 761 __asm__ volatile(SFENCE:::"memory"); |
762 __asm__ volatile(EMMS:::"memory"); | |
18861 | 763 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
764 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
765 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
766 const int r = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
767 const int g = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
768 const int b = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
769 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
770 } |
18861 | 771 } |
772 | |
27486 | 773 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 774 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
775 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
776 const uint8_t *end; |
28276 | 777 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
778 const uint8_t *mm_end; |
18861 | 779 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
780 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
781 end = s + src_size; |
28276 | 782 #if HAVE_MMX |
27744 | 783 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
784 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
785 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
786 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
787 ::"m"(red_15mask),"m"(green_15mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
788 mm_end = end - 11; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
789 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
790 { |
27744 | 791 __asm__ volatile( |
29480 | 792 PREFETCH" 32%1 \n\t" |
793 "movd %1, %%mm0 \n\t" | |
794 "movd 3%1, %%mm3 \n\t" | |
795 "punpckldq 6%1, %%mm0 \n\t" | |
796 "punpckldq 9%1, %%mm3 \n\t" | |
797 "movq %%mm0, %%mm1 \n\t" | |
798 "movq %%mm0, %%mm2 \n\t" | |
799 "movq %%mm3, %%mm4 \n\t" | |
800 "movq %%mm3, %%mm5 \n\t" | |
801 "psrlq $3, %%mm0 \n\t" | |
802 "psrlq $3, %%mm3 \n\t" | |
803 "pand %2, %%mm0 \n\t" | |
804 "pand %2, %%mm3 \n\t" | |
805 "psrlq $6, %%mm1 \n\t" | |
806 "psrlq $6, %%mm4 \n\t" | |
807 "pand %%mm6, %%mm1 \n\t" | |
808 "pand %%mm6, %%mm4 \n\t" | |
809 "psrlq $9, %%mm2 \n\t" | |
810 "psrlq $9, %%mm5 \n\t" | |
811 "pand %%mm7, %%mm2 \n\t" | |
812 "pand %%mm7, %%mm5 \n\t" | |
813 "por %%mm1, %%mm0 \n\t" | |
814 "por %%mm4, %%mm3 \n\t" | |
815 "por %%mm2, %%mm0 \n\t" | |
816 "por %%mm5, %%mm3 \n\t" | |
817 "psllq $16, %%mm3 \n\t" | |
818 "por %%mm3, %%mm0 \n\t" | |
819 MOVNTQ" %%mm0, %0 \n\t" | |
820 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
821 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
822 s += 12; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
823 } |
27744 | 824 __asm__ volatile(SFENCE:::"memory"); |
825 __asm__ volatile(EMMS:::"memory"); | |
18861 | 826 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
827 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
828 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
829 const int b = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
830 const int g = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
831 const int r = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
832 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
833 } |
18861 | 834 } |
835 | |
27486 | 836 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 837 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
838 const uint8_t *s = src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
839 const uint8_t *end; |
28276 | 840 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
841 const uint8_t *mm_end; |
18861 | 842 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
843 uint16_t *d = (uint16_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
844 end = s + src_size; |
28276 | 845 #if HAVE_MMX |
27744 | 846 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
847 __asm__ volatile( | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
848 "movq %0, %%mm7 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
849 "movq %1, %%mm6 \n\t" |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
850 ::"m"(red_15mask),"m"(green_15mask)); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
851 mm_end = end - 15; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
852 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
853 { |
27744 | 854 __asm__ volatile( |
29480 | 855 PREFETCH" 32%1 \n\t" |
856 "movd %1, %%mm0 \n\t" | |
857 "movd 3%1, %%mm3 \n\t" | |
858 "punpckldq 6%1, %%mm0 \n\t" | |
859 "punpckldq 9%1, %%mm3 \n\t" | |
860 "movq %%mm0, %%mm1 \n\t" | |
861 "movq %%mm0, %%mm2 \n\t" | |
862 "movq %%mm3, %%mm4 \n\t" | |
863 "movq %%mm3, %%mm5 \n\t" | |
864 "psllq $7, %%mm0 \n\t" | |
865 "psllq $7, %%mm3 \n\t" | |
866 "pand %%mm7, %%mm0 \n\t" | |
867 "pand %%mm7, %%mm3 \n\t" | |
868 "psrlq $6, %%mm1 \n\t" | |
869 "psrlq $6, %%mm4 \n\t" | |
870 "pand %%mm6, %%mm1 \n\t" | |
871 "pand %%mm6, %%mm4 \n\t" | |
872 "psrlq $19, %%mm2 \n\t" | |
873 "psrlq $19, %%mm5 \n\t" | |
874 "pand %2, %%mm2 \n\t" | |
875 "pand %2, %%mm5 \n\t" | |
876 "por %%mm1, %%mm0 \n\t" | |
877 "por %%mm4, %%mm3 \n\t" | |
878 "por %%mm2, %%mm0 \n\t" | |
879 "por %%mm5, %%mm3 \n\t" | |
880 "psllq $16, %%mm3 \n\t" | |
881 "por %%mm3, %%mm0 \n\t" | |
882 MOVNTQ" %%mm0, %0 \n\t" | |
883 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
884 d += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
885 s += 12; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
886 } |
27744 | 887 __asm__ volatile(SFENCE:::"memory"); |
888 __asm__ volatile(EMMS:::"memory"); | |
18861 | 889 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
890 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
891 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
892 const int r = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
893 const int g = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
894 const int b = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
895 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
896 } |
18861 | 897 } |
898 | |
899 /* | |
25109 | 900 I use less accurate approximation here by simply left-shifting the input |
901 value and filling the low order bits with zeroes. This method improves PNG | |
902 compression but this scheme cannot reproduce white exactly, since it does | |
903 not generate an all-ones maximum value; the net effect is to darken the | |
18861 | 904 image slightly. |
905 | |
906 The better method should be "left bit replication": | |
907 | |
908 4 3 2 1 0 | |
909 --------- | |
910 1 1 0 1 1 | |
911 | |
912 7 6 5 4 3 2 1 0 | |
913 ---------------- | |
914 1 1 0 1 1 1 1 0 | |
915 |=======| |===| | |
27158 | 916 | leftmost bits repeated to fill open bits |
18861 | 917 | |
27158 | 918 original bits |
18861 | 919 */ |
27486 | 920 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 921 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
922 const uint16_t *end; |
28276 | 923 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
924 const uint16_t *mm_end; |
18861 | 925 #endif |
26909 | 926 uint8_t *d = dst; |
26910 | 927 const uint16_t *s = (const uint16_t*)src; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
928 end = s + src_size/2; |
28276 | 929 #if HAVE_MMX |
27744 | 930 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
931 mm_end = end - 7; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
932 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
933 { |
27744 | 934 __asm__ volatile( |
29480 | 935 PREFETCH" 32%1 \n\t" |
936 "movq %1, %%mm0 \n\t" | |
937 "movq %1, %%mm1 \n\t" | |
938 "movq %1, %%mm2 \n\t" | |
939 "pand %2, %%mm0 \n\t" | |
940 "pand %3, %%mm1 \n\t" | |
941 "pand %4, %%mm2 \n\t" | |
942 "psllq $3, %%mm0 \n\t" | |
943 "psrlq $2, %%mm1 \n\t" | |
944 "psrlq $7, %%mm2 \n\t" | |
945 "movq %%mm0, %%mm3 \n\t" | |
946 "movq %%mm1, %%mm4 \n\t" | |
947 "movq %%mm2, %%mm5 \n\t" | |
948 "punpcklwd %5, %%mm0 \n\t" | |
949 "punpcklwd %5, %%mm1 \n\t" | |
950 "punpcklwd %5, %%mm2 \n\t" | |
951 "punpckhwd %5, %%mm3 \n\t" | |
952 "punpckhwd %5, %%mm4 \n\t" | |
953 "punpckhwd %5, %%mm5 \n\t" | |
954 "psllq $8, %%mm1 \n\t" | |
955 "psllq $16, %%mm2 \n\t" | |
956 "por %%mm1, %%mm0 \n\t" | |
957 "por %%mm2, %%mm0 \n\t" | |
958 "psllq $8, %%mm4 \n\t" | |
959 "psllq $16, %%mm5 \n\t" | |
960 "por %%mm4, %%mm3 \n\t" | |
961 "por %%mm5, %%mm3 \n\t" | |
18861 | 962 |
29480 | 963 "movq %%mm0, %%mm6 \n\t" |
964 "movq %%mm3, %%mm7 \n\t" | |
23129 | 965 |
29480 | 966 "movq 8%1, %%mm0 \n\t" |
967 "movq 8%1, %%mm1 \n\t" | |
968 "movq 8%1, %%mm2 \n\t" | |
969 "pand %2, %%mm0 \n\t" | |
970 "pand %3, %%mm1 \n\t" | |
971 "pand %4, %%mm2 \n\t" | |
972 "psllq $3, %%mm0 \n\t" | |
973 "psrlq $2, %%mm1 \n\t" | |
974 "psrlq $7, %%mm2 \n\t" | |
975 "movq %%mm0, %%mm3 \n\t" | |
976 "movq %%mm1, %%mm4 \n\t" | |
977 "movq %%mm2, %%mm5 \n\t" | |
978 "punpcklwd %5, %%mm0 \n\t" | |
979 "punpcklwd %5, %%mm1 \n\t" | |
980 "punpcklwd %5, %%mm2 \n\t" | |
981 "punpckhwd %5, %%mm3 \n\t" | |
982 "punpckhwd %5, %%mm4 \n\t" | |
983 "punpckhwd %5, %%mm5 \n\t" | |
984 "psllq $8, %%mm1 \n\t" | |
985 "psllq $16, %%mm2 \n\t" | |
986 "por %%mm1, %%mm0 \n\t" | |
987 "por %%mm2, %%mm0 \n\t" | |
988 "psllq $8, %%mm4 \n\t" | |
989 "psllq $16, %%mm5 \n\t" | |
990 "por %%mm4, %%mm3 \n\t" | |
991 "por %%mm5, %%mm3 \n\t" | |
18861 | 992 |
29480 | 993 :"=m"(*d) |
994 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) | |
995 :"memory"); | |
27158 | 996 /* borrowed 32 to 24 */ |
27744 | 997 __asm__ volatile( |
29480 | 998 "movq %%mm0, %%mm4 \n\t" |
999 "movq %%mm3, %%mm5 \n\t" | |
1000 "movq %%mm6, %%mm0 \n\t" | |
1001 "movq %%mm7, %%mm1 \n\t" | |
23129 | 1002 |
29480 | 1003 "movq %%mm4, %%mm6 \n\t" |
1004 "movq %%mm5, %%mm7 \n\t" | |
1005 "movq %%mm0, %%mm2 \n\t" | |
1006 "movq %%mm1, %%mm3 \n\t" | |
18861 | 1007 |
29480 | 1008 "psrlq $8, %%mm2 \n\t" |
1009 "psrlq $8, %%mm3 \n\t" | |
1010 "psrlq $8, %%mm6 \n\t" | |
1011 "psrlq $8, %%mm7 \n\t" | |
1012 "pand %2, %%mm0 \n\t" | |
1013 "pand %2, %%mm1 \n\t" | |
1014 "pand %2, %%mm4 \n\t" | |
1015 "pand %2, %%mm5 \n\t" | |
1016 "pand %3, %%mm2 \n\t" | |
1017 "pand %3, %%mm3 \n\t" | |
1018 "pand %3, %%mm6 \n\t" | |
1019 "pand %3, %%mm7 \n\t" | |
1020 "por %%mm2, %%mm0 \n\t" | |
1021 "por %%mm3, %%mm1 \n\t" | |
1022 "por %%mm6, %%mm4 \n\t" | |
1023 "por %%mm7, %%mm5 \n\t" | |
18861 | 1024 |
29480 | 1025 "movq %%mm1, %%mm2 \n\t" |
1026 "movq %%mm4, %%mm3 \n\t" | |
1027 "psllq $48, %%mm2 \n\t" | |
1028 "psllq $32, %%mm3 \n\t" | |
1029 "pand %4, %%mm2 \n\t" | |
1030 "pand %5, %%mm3 \n\t" | |
1031 "por %%mm2, %%mm0 \n\t" | |
1032 "psrlq $16, %%mm1 \n\t" | |
1033 "psrlq $32, %%mm4 \n\t" | |
1034 "psllq $16, %%mm5 \n\t" | |
1035 "por %%mm3, %%mm1 \n\t" | |
1036 "pand %6, %%mm5 \n\t" | |
1037 "por %%mm5, %%mm4 \n\t" | |
18861 | 1038 |
29480 | 1039 MOVNTQ" %%mm0, %0 \n\t" |
1040 MOVNTQ" %%mm1, 8%0 \n\t" | |
1041 MOVNTQ" %%mm4, 16%0" | |
18861 | 1042 |
29480 | 1043 :"=m"(*d) |
1044 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
1045 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1046 d += 24; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1047 s += 8; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1048 } |
27744 | 1049 __asm__ volatile(SFENCE:::"memory"); |
1050 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1051 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1052 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1053 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1054 register uint16_t bgr; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1055 bgr = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1056 *d++ = (bgr&0x1F)<<3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1057 *d++ = (bgr&0x3E0)>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1058 *d++ = (bgr&0x7C00)>>7; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1059 } |
18861 | 1060 } |
1061 | |
27486 | 1062 static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
18861 | 1063 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1064 const uint16_t *end; |
28276 | 1065 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1066 const uint16_t *mm_end; |
18861 | 1067 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1068 uint8_t *d = (uint8_t *)dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1069 const uint16_t *s = (const uint16_t *)src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1070 end = s + src_size/2; |
28276 | 1071 #if HAVE_MMX |
27744 | 1072 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1073 mm_end = end - 7; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1074 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1075 { |
27744 | 1076 __asm__ volatile( |
29480 | 1077 PREFETCH" 32%1 \n\t" |
1078 "movq %1, %%mm0 \n\t" | |
1079 "movq %1, %%mm1 \n\t" | |
1080 "movq %1, %%mm2 \n\t" | |
1081 "pand %2, %%mm0 \n\t" | |
1082 "pand %3, %%mm1 \n\t" | |
1083 "pand %4, %%mm2 \n\t" | |
1084 "psllq $3, %%mm0 \n\t" | |
1085 "psrlq $3, %%mm1 \n\t" | |
1086 "psrlq $8, %%mm2 \n\t" | |
1087 "movq %%mm0, %%mm3 \n\t" | |
1088 "movq %%mm1, %%mm4 \n\t" | |
1089 "movq %%mm2, %%mm5 \n\t" | |
1090 "punpcklwd %5, %%mm0 \n\t" | |
1091 "punpcklwd %5, %%mm1 \n\t" | |
1092 "punpcklwd %5, %%mm2 \n\t" | |
1093 "punpckhwd %5, %%mm3 \n\t" | |
1094 "punpckhwd %5, %%mm4 \n\t" | |
1095 "punpckhwd %5, %%mm5 \n\t" | |
1096 "psllq $8, %%mm1 \n\t" | |
1097 "psllq $16, %%mm2 \n\t" | |
1098 "por %%mm1, %%mm0 \n\t" | |
1099 "por %%mm2, %%mm0 \n\t" | |
1100 "psllq $8, %%mm4 \n\t" | |
1101 "psllq $16, %%mm5 \n\t" | |
1102 "por %%mm4, %%mm3 \n\t" | |
1103 "por %%mm5, %%mm3 \n\t" | |
23129 | 1104 |
29480 | 1105 "movq %%mm0, %%mm6 \n\t" |
1106 "movq %%mm3, %%mm7 \n\t" | |
18861 | 1107 |
29480 | 1108 "movq 8%1, %%mm0 \n\t" |
1109 "movq 8%1, %%mm1 \n\t" | |
1110 "movq 8%1, %%mm2 \n\t" | |
1111 "pand %2, %%mm0 \n\t" | |
1112 "pand %3, %%mm1 \n\t" | |
1113 "pand %4, %%mm2 \n\t" | |
1114 "psllq $3, %%mm0 \n\t" | |
1115 "psrlq $3, %%mm1 \n\t" | |
1116 "psrlq $8, %%mm2 \n\t" | |
1117 "movq %%mm0, %%mm3 \n\t" | |
1118 "movq %%mm1, %%mm4 \n\t" | |
1119 "movq %%mm2, %%mm5 \n\t" | |
1120 "punpcklwd %5, %%mm0 \n\t" | |
1121 "punpcklwd %5, %%mm1 \n\t" | |
1122 "punpcklwd %5, %%mm2 \n\t" | |
1123 "punpckhwd %5, %%mm3 \n\t" | |
1124 "punpckhwd %5, %%mm4 \n\t" | |
1125 "punpckhwd %5, %%mm5 \n\t" | |
1126 "psllq $8, %%mm1 \n\t" | |
1127 "psllq $16, %%mm2 \n\t" | |
1128 "por %%mm1, %%mm0 \n\t" | |
1129 "por %%mm2, %%mm0 \n\t" | |
1130 "psllq $8, %%mm4 \n\t" | |
1131 "psllq $16, %%mm5 \n\t" | |
1132 "por %%mm4, %%mm3 \n\t" | |
1133 "por %%mm5, %%mm3 \n\t" | |
1134 :"=m"(*d) | |
1135 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) | |
1136 :"memory"); | |
27158 | 1137 /* borrowed 32 to 24 */ |
27744 | 1138 __asm__ volatile( |
29480 | 1139 "movq %%mm0, %%mm4 \n\t" |
1140 "movq %%mm3, %%mm5 \n\t" | |
1141 "movq %%mm6, %%mm0 \n\t" | |
1142 "movq %%mm7, %%mm1 \n\t" | |
23129 | 1143 |
29480 | 1144 "movq %%mm4, %%mm6 \n\t" |
1145 "movq %%mm5, %%mm7 \n\t" | |
1146 "movq %%mm0, %%mm2 \n\t" | |
1147 "movq %%mm1, %%mm3 \n\t" | |
18861 | 1148 |
29480 | 1149 "psrlq $8, %%mm2 \n\t" |
1150 "psrlq $8, %%mm3 \n\t" | |
1151 "psrlq $8, %%mm6 \n\t" | |
1152 "psrlq $8, %%mm7 \n\t" | |
1153 "pand %2, %%mm0 \n\t" | |
1154 "pand %2, %%mm1 \n\t" | |
1155 "pand %2, %%mm4 \n\t" | |
1156 "pand %2, %%mm5 \n\t" | |
1157 "pand %3, %%mm2 \n\t" | |
1158 "pand %3, %%mm3 \n\t" | |
1159 "pand %3, %%mm6 \n\t" | |
1160 "pand %3, %%mm7 \n\t" | |
1161 "por %%mm2, %%mm0 \n\t" | |
1162 "por %%mm3, %%mm1 \n\t" | |
1163 "por %%mm6, %%mm4 \n\t" | |
1164 "por %%mm7, %%mm5 \n\t" | |
18861 | 1165 |
29480 | 1166 "movq %%mm1, %%mm2 \n\t" |
1167 "movq %%mm4, %%mm3 \n\t" | |
1168 "psllq $48, %%mm2 \n\t" | |
1169 "psllq $32, %%mm3 \n\t" | |
1170 "pand %4, %%mm2 \n\t" | |
1171 "pand %5, %%mm3 \n\t" | |
1172 "por %%mm2, %%mm0 \n\t" | |
1173 "psrlq $16, %%mm1 \n\t" | |
1174 "psrlq $32, %%mm4 \n\t" | |
1175 "psllq $16, %%mm5 \n\t" | |
1176 "por %%mm3, %%mm1 \n\t" | |
1177 "pand %6, %%mm5 \n\t" | |
1178 "por %%mm5, %%mm4 \n\t" | |
18861 | 1179 |
29480 | 1180 MOVNTQ" %%mm0, %0 \n\t" |
1181 MOVNTQ" %%mm1, 8%0 \n\t" | |
1182 MOVNTQ" %%mm4, 16%0" | |
18861 | 1183 |
29480 | 1184 :"=m"(*d) |
1185 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) | |
1186 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1187 d += 24; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1188 s += 8; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1189 } |
27744 | 1190 __asm__ volatile(SFENCE:::"memory"); |
1191 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1192 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1193 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1194 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1195 register uint16_t bgr; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1196 bgr = *s++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1197 *d++ = (bgr&0x1F)<<3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1198 *d++ = (bgr&0x7E0)>>3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1199 *d++ = (bgr&0xF800)>>8; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1200 } |
18861 | 1201 } |
1202 | |
28773
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1203 /* |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1204 * mm0 = 00 B3 00 B2 00 B1 00 B0 |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1205 * mm1 = 00 G3 00 G2 00 G1 00 G0 |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1206 * mm2 = 00 R3 00 R2 00 R1 00 R0 |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1207 * mm6 = FF FF FF FF FF FF FF FF |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1208 * mm7 = 00 00 00 00 00 00 00 00 |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1209 */ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1210 #define PACK_RGB32 \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1211 "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1212 "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1213 "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1214 "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1215 "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1216 "movq %%mm0, %%mm3 \n\t" \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1217 "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1218 "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1219 MOVNTQ" %%mm0, %0 \n\t" \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1220 MOVNTQ" %%mm3, 8%0 \n\t" \ |
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1221 |
18861 | 1222 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size) |
1223 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1224 const uint16_t *end; |
28276 | 1225 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1226 const uint16_t *mm_end; |
18861 | 1227 #endif |
26909 | 1228 uint8_t *d = dst; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1229 const uint16_t *s = (const uint16_t *)src; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1230 end = s + src_size/2; |
28276 | 1231 #if HAVE_MMX |
27744 | 1232 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1233 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); | |
28773
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1234 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1235 mm_end = end - 3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1236 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1237 { |
27744 | 1238 __asm__ volatile( |
29480 | 1239 PREFETCH" 32%1 \n\t" |
1240 "movq %1, %%mm0 \n\t" | |
1241 "movq %1, %%mm1 \n\t" | |
1242 "movq %1, %%mm2 \n\t" | |
1243 "pand %2, %%mm0 \n\t" | |
1244 "pand %3, %%mm1 \n\t" | |
1245 "pand %4, %%mm2 \n\t" | |
1246 "psllq $3, %%mm0 \n\t" | |
1247 "psrlq $2, %%mm1 \n\t" | |
1248 "psrlq $7, %%mm2 \n\t" | |
1249 PACK_RGB32 | |
1250 :"=m"(*d) | |
1251 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) | |
1252 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1253 d += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1254 s += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1255 } |
27744 | 1256 __asm__ volatile(SFENCE:::"memory"); |
1257 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1258 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1259 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1260 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1261 register uint16_t bgr; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1262 bgr = *s++; |
29397 | 1263 #if HAVE_BIGENDIAN |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
1264 *d++ = 255; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1265 *d++ = (bgr&0x7C00)>>7; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1266 *d++ = (bgr&0x3E0)>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1267 *d++ = (bgr&0x1F)<<3; |
18861 | 1268 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1269 *d++ = (bgr&0x1F)<<3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1270 *d++ = (bgr&0x3E0)>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1271 *d++ = (bgr&0x7C00)>>7; |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
1272 *d++ = 255; |
18861 | 1273 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1274 } |
18861 | 1275 } |
1276 | |
1277 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size) | |
1278 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1279 const uint16_t *end; |
28276 | 1280 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1281 const uint16_t *mm_end; |
18861 | 1282 #endif |
26909 | 1283 uint8_t *d = dst; |
26910 | 1284 const uint16_t *s = (const uint16_t*)src; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1285 end = s + src_size/2; |
28276 | 1286 #if HAVE_MMX |
27744 | 1287 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1288 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); | |
28773
8a0785c19f48
Rewrite of rgb15to32 and rgb16to32 using fewer asm instructions
sdrik
parents:
28721
diff
changeset
|
1289 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1290 mm_end = end - 3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1291 while (s < mm_end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1292 { |
27744 | 1293 __asm__ volatile( |
29480 | 1294 PREFETCH" 32%1 \n\t" |
1295 "movq %1, %%mm0 \n\t" | |
1296 "movq %1, %%mm1 \n\t" | |
1297 "movq %1, %%mm2 \n\t" | |
1298 "pand %2, %%mm0 \n\t" | |
1299 "pand %3, %%mm1 \n\t" | |
1300 "pand %4, %%mm2 \n\t" | |
1301 "psllq $3, %%mm0 \n\t" | |
1302 "psrlq $3, %%mm1 \n\t" | |
1303 "psrlq $8, %%mm2 \n\t" | |
1304 PACK_RGB32 | |
1305 :"=m"(*d) | |
1306 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) | |
1307 :"memory"); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1308 d += 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1309 s += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1310 } |
27744 | 1311 __asm__ volatile(SFENCE:::"memory"); |
1312 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1313 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1314 while (s < end) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1315 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1316 register uint16_t bgr; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1317 bgr = *s++; |
29397 | 1318 #if HAVE_BIGENDIAN |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
1319 *d++ = 255; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1320 *d++ = (bgr&0xF800)>>8; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1321 *d++ = (bgr&0x7E0)>>3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1322 *d++ = (bgr&0x1F)<<3; |
18861 | 1323 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1324 *d++ = (bgr&0x1F)<<3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1325 *d++ = (bgr&0x7E0)>>3; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1326 *d++ = (bgr&0xF800)>>8; |
28721
267dd38c800e
When converting from a non alpha format to an alpha format, defaults to all ones rather than all zeroes
sdrik
parents:
28323
diff
changeset
|
1327 *d++ = 255; |
18861 | 1328 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1329 } |
18861 | 1330 } |
1331 | |
1332 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | |
1333 { | |
28968 | 1334 x86_reg idx = 15 - src_size; |
26910 | 1335 const uint8_t *s = src-idx; |
1336 uint8_t *d = dst-idx; | |
28276 | 1337 #if HAVE_MMX |
27744 | 1338 __asm__ volatile( |
29480 | 1339 "test %0, %0 \n\t" |
1340 "jns 2f \n\t" | |
1341 PREFETCH" (%1, %0) \n\t" | |
1342 "movq %3, %%mm7 \n\t" | |
1343 "pxor %4, %%mm7 \n\t" | |
1344 "movq %%mm7, %%mm6 \n\t" | |
1345 "pxor %5, %%mm7 \n\t" | |
1346 ASMALIGN(4) | |
1347 "1: \n\t" | |
1348 PREFETCH" 32(%1, %0) \n\t" | |
1349 "movq (%1, %0), %%mm0 \n\t" | |
1350 "movq 8(%1, %0), %%mm1 \n\t" | |
28276 | 1351 # if HAVE_MMX2 |
29480 | 1352 "pshufw $177, %%mm0, %%mm3 \n\t" |
1353 "pshufw $177, %%mm1, %%mm5 \n\t" | |
1354 "pand %%mm7, %%mm0 \n\t" | |
1355 "pand %%mm6, %%mm3 \n\t" | |
1356 "pand %%mm7, %%mm1 \n\t" | |
1357 "pand %%mm6, %%mm5 \n\t" | |
1358 "por %%mm3, %%mm0 \n\t" | |
1359 "por %%mm5, %%mm1 \n\t" | |
22991 | 1360 # else |
29480 | 1361 "movq %%mm0, %%mm2 \n\t" |
1362 "movq %%mm1, %%mm4 \n\t" | |
1363 "pand %%mm7, %%mm0 \n\t" | |
1364 "pand %%mm6, %%mm2 \n\t" | |
1365 "pand %%mm7, %%mm1 \n\t" | |
1366 "pand %%mm6, %%mm4 \n\t" | |
1367 "movq %%mm2, %%mm3 \n\t" | |
1368 "movq %%mm4, %%mm5 \n\t" | |
1369 "pslld $16, %%mm2 \n\t" | |
1370 "psrld $16, %%mm3 \n\t" | |
1371 "pslld $16, %%mm4 \n\t" | |
1372 "psrld $16, %%mm5 \n\t" | |
1373 "por %%mm2, %%mm0 \n\t" | |
1374 "por %%mm4, %%mm1 \n\t" | |
1375 "por %%mm3, %%mm0 \n\t" | |
1376 "por %%mm5, %%mm1 \n\t" | |
22991 | 1377 # endif |
29480 | 1378 MOVNTQ" %%mm0, (%2, %0) \n\t" |
1379 MOVNTQ" %%mm1, 8(%2, %0) \n\t" | |
1380 "add $16, %0 \n\t" | |
1381 "js 1b \n\t" | |
1382 SFENCE" \n\t" | |
1383 EMMS" \n\t" | |
1384 "2: \n\t" | |
1385 : "+&r"(idx) | |
1386 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) | |
1387 : "memory"); | |
18861 | 1388 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1389 for (; idx<15; idx+=4) { |
26910 | 1390 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1391 v &= 0xff00ff; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1392 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1393 } |
18861 | 1394 } |
1395 | |
1396 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) | |
1397 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1398 unsigned i; |
28276 | 1399 #if HAVE_MMX |
28957 | 1400 x86_reg mmx_size= 23 - src_size; |
27744 | 1401 __asm__ volatile ( |
29480 | 1402 "test %%"REG_a", %%"REG_a" \n\t" |
1403 "jns 2f \n\t" | |
1404 "movq "MANGLE(mask24r)", %%mm5 \n\t" | |
1405 "movq "MANGLE(mask24g)", %%mm6 \n\t" | |
1406 "movq "MANGLE(mask24b)", %%mm7 \n\t" | |
1407 ASMALIGN(4) | |
1408 "1: \n\t" | |
1409 PREFETCH" 32(%1, %%"REG_a") \n\t" | |
1410 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG | |
1411 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG | |
1412 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B | |
1413 "psllq $16, %%mm0 \n\t" // 00 BGR BGR | |
1414 "pand %%mm5, %%mm0 \n\t" | |
1415 "pand %%mm6, %%mm1 \n\t" | |
1416 "pand %%mm7, %%mm2 \n\t" | |
1417 "por %%mm0, %%mm1 \n\t" | |
1418 "por %%mm2, %%mm1 \n\t" | |
1419 "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG | |
1420 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG | |
1421 "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B | |
1422 "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR | |
1423 "pand %%mm7, %%mm0 \n\t" | |
1424 "pand %%mm5, %%mm1 \n\t" | |
1425 "pand %%mm6, %%mm2 \n\t" | |
1426 "por %%mm0, %%mm1 \n\t" | |
1427 "por %%mm2, %%mm1 \n\t" | |
1428 "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B | |
1429 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R | |
1430 "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR | |
1431 "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG | |
1432 "pand %%mm6, %%mm0 \n\t" | |
1433 "pand %%mm7, %%mm1 \n\t" | |
1434 "pand %%mm5, %%mm2 \n\t" | |
1435 "por %%mm0, %%mm1 \n\t" | |
1436 "por %%mm2, %%mm1 \n\t" | |
1437 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" | |
1438 "add $24, %%"REG_a" \n\t" | |
1439 " js 1b \n\t" | |
1440 "2: \n\t" | |
1441 : "+a" (mmx_size) | |
1442 : "r" (src-mmx_size), "r"(dst-mmx_size) | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1443 ); |
18861 | 1444 |
27744 | 1445 __asm__ volatile(SFENCE:::"memory"); |
1446 __asm__ volatile(EMMS:::"memory"); | |
18861 | 1447 |
27158 | 1448 if (mmx_size==23) return; //finished, was multiple of 8 |
18861 | 1449 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1450 src+= src_size; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1451 dst+= src_size; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1452 src_size= 23-mmx_size; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1453 src-= src_size; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1454 dst-= src_size; |
18861 | 1455 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1456 for (i=0; i<src_size; i+=3) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1457 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1458 register uint8_t x; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1459 x = src[i + 2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1460 dst[i + 1] = src[i + 1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1461 dst[i + 2] = src[i + 0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1462 dst[i + 0] = x; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1463 } |
18861 | 1464 } |
1465 | |
1466 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1467 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1468 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
18861 | 1469 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1470 long y; |
28968 | 1471 const x86_reg chromWidth= width>>1; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1472 for (y=0; y<height; y++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1473 { |
28276 | 1474 #if HAVE_MMX |
25109 | 1475 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
27744 | 1476 __asm__ volatile( |
29480 | 1477 "xor %%"REG_a", %%"REG_a" \n\t" |
1478 ASMALIGN(4) | |
1479 "1: \n\t" | |
1480 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | |
1481 PREFETCH" 32(%2, %%"REG_a") \n\t" | |
1482 PREFETCH" 32(%3, %%"REG_a") \n\t" | |
1483 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) | |
1484 "movq %%mm0, %%mm2 \n\t" // U(0) | |
1485 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) | |
1486 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1487 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | |
18861 | 1488 |
29480 | 1489 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) |
1490 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) | |
1491 "movq %%mm3, %%mm4 \n\t" // Y(0) | |
1492 "movq %%mm5, %%mm6 \n\t" // Y(8) | |
1493 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) | |
1494 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) | |
1495 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) | |
1496 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) | |
18861 | 1497 |
29480 | 1498 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t" |
1499 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" | |
1500 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t" | |
1501 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" | |
18861 | 1502 |
29480 | 1503 "add $8, %%"REG_a" \n\t" |
1504 "cmp %4, %%"REG_a" \n\t" | |
1505 " jb 1b \n\t" | |
1506 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) | |
1507 : "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1508 ); |
18861 | 1509 #else |
1510 | |
28276 | 1511 #if ARCH_ALPHA && HAVE_MVI |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1512 #define pl2yuy2(n) \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1513 y1 = yc[n]; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1514 y2 = yc2[n]; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1515 u = uc[n]; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1516 v = vc[n]; \ |
27744 | 1517 __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \ |
1518 __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \ | |
1519 __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u)); \ | |
1520 __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v)); \ | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1521 yuv1 = (u << 8) + (v << 24); \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1522 yuv2 = yuv1 + y2; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1523 yuv1 += y1; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1524 qdst[n] = yuv1; \ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1525 qdst2[n] = yuv2; |
18861 | 1526 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1527 int i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1528 uint64_t *qdst = (uint64_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1529 uint64_t *qdst2 = (uint64_t *) (dst + dstStride); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1530 const uint32_t *yc = (uint32_t *) ysrc; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1531 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1532 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1533 for (i = 0; i < chromWidth; i += 8){ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1534 uint64_t y1, y2, yuv1, yuv2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1535 uint64_t u, v; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1536 /* Prefetch */ |
27744 | 1537 __asm__("ldq $31,64(%0)" :: "r"(yc)); |
1538 __asm__("ldq $31,64(%0)" :: "r"(yc2)); | |
1539 __asm__("ldq $31,64(%0)" :: "r"(uc)); | |
1540 __asm__("ldq $31,64(%0)" :: "r"(vc)); | |
18861 | 1541 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1542 pl2yuy2(0); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1543 pl2yuy2(1); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1544 pl2yuy2(2); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1545 pl2yuy2(3); |
18861 | 1546 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1547 yc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1548 yc2 += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1549 uc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1550 vc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1551 qdst += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1552 qdst2 += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1553 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1554 y++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1555 ysrc += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1556 dst += dstStride; |
18861 | 1557 |
27688
49d5420c5698
Use HAVE_FAST_64BIT instead of nonstandard __WORDSIZE macro.
diego
parents:
27666
diff
changeset
|
1558 #elif HAVE_FAST_64BIT |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1559 int i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1560 uint64_t *ldst = (uint64_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1561 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1562 for (i = 0; i < chromWidth; i += 2){ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1563 uint64_t k, l; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1564 k = yc[0] + (uc[0] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1565 (yc[1] << 16) + (vc[0] << 24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1566 l = yc[2] + (uc[1] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1567 (yc[3] << 16) + (vc[1] << 24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1568 *ldst++ = k + (l << 32); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1569 yc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1570 uc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1571 vc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1572 } |
18861 | 1573 |
1574 #else | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1575 int i, *idst = (int32_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1576 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1577 for (i = 0; i < chromWidth; i++){ |
29397 | 1578 #if HAVE_BIGENDIAN |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1579 *idst++ = (yc[0] << 24)+ (uc[0] << 16) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1580 (yc[1] << 8) + (vc[0] << 0); |
18861 | 1581 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1582 *idst++ = yc[0] + (uc[0] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1583 (yc[1] << 16) + (vc[0] << 24); |
18861 | 1584 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1585 yc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1586 uc++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1587 vc++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1588 } |
18861 | 1589 #endif |
1590 #endif | |
25751 | 1591 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1592 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1593 usrc += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1594 vsrc += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1595 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1596 ysrc += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1597 dst += dstStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1598 } |
28276 | 1599 #if HAVE_MMX |
29480 | 1600 __asm__(EMMS" \n\t" |
1601 SFENCE" \n\t" | |
1602 :::"memory"); | |
18861 | 1603 #endif |
1604 } | |
1605 | |
1606 /** | |
27158 | 1607 * Height should be a multiple of 2 and width should be a multiple of 16. |
1608 * (If this is a problem for anyone then tell me, and I will fix it.) | |
18861 | 1609 */ |
1610 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1611 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1612 long lumStride, long chromStride, long dstStride) |
18861 | 1613 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1614 //FIXME interpolate chroma |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1615 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); |
18861 | 1616 } |
1617 | |
1618 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1619 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1620 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
18861 | 1621 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1622 long y; |
28968 | 1623 const x86_reg chromWidth= width>>1; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1624 for (y=0; y<height; y++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1625 { |
28276 | 1626 #if HAVE_MMX |
25109 | 1627 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
27744 | 1628 __asm__ volatile( |
29480 | 1629 "xor %%"REG_a", %%"REG_a" \n\t" |
1630 ASMALIGN(4) | |
1631 "1: \n\t" | |
1632 PREFETCH" 32(%1, %%"REG_a", 2) \n\t" | |
1633 PREFETCH" 32(%2, %%"REG_a") \n\t" | |
1634 PREFETCH" 32(%3, %%"REG_a") \n\t" | |
1635 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) | |
1636 "movq %%mm0, %%mm2 \n\t" // U(0) | |
1637 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) | |
1638 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1639 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) | |
18861 | 1640 |
29480 | 1641 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) |
1642 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) | |
1643 "movq %%mm0, %%mm4 \n\t" // Y(0) | |
1644 "movq %%mm2, %%mm6 \n\t" // Y(8) | |
1645 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) | |
1646 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4) | |
1647 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) | |
1648 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) | |
18861 | 1649 |
29480 | 1650 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t" |
1651 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" | |
1652 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t" | |
1653 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" | |
18861 | 1654 |
29480 | 1655 "add $8, %%"REG_a" \n\t" |
1656 "cmp %4, %%"REG_a" \n\t" | |
1657 " jb 1b \n\t" | |
1658 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) | |
1659 : "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1660 ); |
18861 | 1661 #else |
25109 | 1662 //FIXME adapt the Alpha ASM code from yv12->yuy2 |
18861 | 1663 |
27688
49d5420c5698
Use HAVE_FAST_64BIT instead of nonstandard __WORDSIZE macro.
diego
parents:
27666
diff
changeset
|
1664 #if HAVE_FAST_64BIT |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1665 int i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1666 uint64_t *ldst = (uint64_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1667 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1668 for (i = 0; i < chromWidth; i += 2){ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1669 uint64_t k, l; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1670 k = uc[0] + (yc[0] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1671 (vc[0] << 16) + (yc[1] << 24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1672 l = uc[1] + (yc[2] << 8) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1673 (vc[1] << 16) + (yc[3] << 24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1674 *ldst++ = k + (l << 32); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1675 yc += 4; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1676 uc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1677 vc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1678 } |
18861 | 1679 |
1680 #else | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1681 int i, *idst = (int32_t *) dst; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1682 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1683 for (i = 0; i < chromWidth; i++){ |
29397 | 1684 #if HAVE_BIGENDIAN |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1685 *idst++ = (uc[0] << 24)+ (yc[0] << 16) + |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1686 (vc[0] << 8) + (yc[1] << 0); |
18861 | 1687 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1688 *idst++ = uc[0] + (yc[0] << 8) + |
27158 | 1689 (vc[0] << 16) + (yc[1] << 24); |
18861 | 1690 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1691 yc += 2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1692 uc++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1693 vc++; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1694 } |
18861 | 1695 #endif |
1696 #endif | |
25751 | 1697 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1698 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1699 usrc += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1700 vsrc += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1701 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1702 ysrc += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1703 dst += dstStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1704 } |
28276 | 1705 #if HAVE_MMX |
29480 | 1706 __asm__(EMMS" \n\t" |
1707 SFENCE" \n\t" | |
1708 :::"memory"); | |
18861 | 1709 #endif |
1710 } | |
1711 | |
1712 /** | |
27158 | 1713 * Height should be a multiple of 2 and width should be a multiple of 16 |
1714 * (If this is a problem for anyone then tell me, and I will fix it.) | |
18861 | 1715 */ |
1716 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1717 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1718 long lumStride, long chromStride, long dstStride) |
18861 | 1719 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1720 //FIXME interpolate chroma |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1721 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); |
18861 | 1722 } |
1723 | |
1724 /** | |
25109 | 1725 * Width should be a multiple of 16. |
18861 | 1726 */ |
27495 | 1727 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
1728 long width, long height, | |
1729 long lumStride, long chromStride, long dstStride) | |
1730 { | |
1731 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); | |
1732 } | |
1733 | |
1734 /** | |
1735 * Width should be a multiple of 16. | |
1736 */ | |
18861 | 1737 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1738 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1739 long lumStride, long chromStride, long dstStride) |
18861 | 1740 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1741 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); |
18861 | 1742 } |
1743 | |
1744 /** | |
27158 | 1745 * Height should be a multiple of 2 and width should be a multiple of 16. |
1746 * (If this is a problem for anyone then tell me, and I will fix it.) | |
18861 | 1747 */ |
1748 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1749 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1750 long lumStride, long chromStride, long srcStride) |
18861 | 1751 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1752 long y; |
28968 | 1753 const x86_reg chromWidth= width>>1; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1754 for (y=0; y<height; y+=2) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1755 { |
28276 | 1756 #if HAVE_MMX |
27744 | 1757 __asm__ volatile( |
29480 | 1758 "xor %%"REG_a", %%"REG_a" \n\t" |
1759 "pcmpeqw %%mm7, %%mm7 \n\t" | |
1760 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |
1761 ASMALIGN(4) | |
1762 "1: \n\t" | |
1763 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |
1764 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1765 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1766 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) | |
1767 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) | |
1768 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) | |
1769 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4) | |
1770 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | |
1771 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | |
1772 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
1773 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | |
18861 | 1774 |
29480 | 1775 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" |
18861 | 1776 |
29480 | 1777 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) |
1778 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) | |
1779 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) | |
1780 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) | |
1781 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) | |
1782 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12) | |
1783 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | |
1784 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | |
1785 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | |
1786 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | |
18861 | 1787 |
29480 | 1788 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" |
18861 | 1789 |
29480 | 1790 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) |
1791 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | |
1792 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | |
1793 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | |
1794 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | |
1795 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | |
1796 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | |
1797 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | |
18861 | 1798 |
29480 | 1799 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" |
1800 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" | |
18861 | 1801 |
29480 | 1802 "add $8, %%"REG_a" \n\t" |
1803 "cmp %4, %%"REG_a" \n\t" | |
1804 " jb 1b \n\t" | |
1805 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | |
1806 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1807 ); |
18861 | 1808 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1809 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1810 src += srcStride; |
18861 | 1811 |
27744 | 1812 __asm__ volatile( |
29480 | 1813 "xor %%"REG_a", %%"REG_a" \n\t" |
1814 ASMALIGN(4) | |
1815 "1: \n\t" | |
1816 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |
1817 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |
1818 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) | |
1819 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) | |
1820 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) | |
1821 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | |
1822 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | |
1823 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | |
1824 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | |
1825 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | |
1826 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | |
18861 | 1827 |
29480 | 1828 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" |
1829 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" | |
18861 | 1830 |
29480 | 1831 "add $8, %%"REG_a" \n\t" |
1832 "cmp %4, %%"REG_a" \n\t" | |
1833 " jb 1b \n\t" | |
18861 | 1834 |
29480 | 1835 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
1836 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1837 ); |
18861 | 1838 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1839 long i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1840 for (i=0; i<chromWidth; i++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1841 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1842 ydst[2*i+0] = src[4*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1843 udst[i] = src[4*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1844 ydst[2*i+1] = src[4*i+2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1845 vdst[i] = src[4*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1846 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1847 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1848 src += srcStride; |
18861 | 1849 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1850 for (i=0; i<chromWidth; i++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1851 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1852 ydst[2*i+0] = src[4*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1853 ydst[2*i+1] = src[4*i+2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1854 } |
18861 | 1855 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1856 udst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1857 vdst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1858 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1859 src += srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1860 } |
28276 | 1861 #if HAVE_MMX |
29480 | 1862 __asm__ volatile(EMMS" \n\t" |
1863 SFENCE" \n\t" | |
1864 :::"memory"); | |
18861 | 1865 #endif |
1866 } | |
1867 | |
1868 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1869 uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1870 long width, long height, long lumStride, long chromStride) |
18861 | 1871 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1872 /* Y Plane */ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1873 memcpy(ydst, ysrc, width*height); |
18861 | 1874 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1875 /* XXX: implement upscaling for U,V */ |
18861 | 1876 } |
1877 | |
1878 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) | |
1879 { | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1880 long x,y; |
23129 | 1881 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1882 dst[0]= src[0]; |
23129 | 1883 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1884 // first line |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1885 for (x=0; x<srcWidth-1; x++){ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1886 dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1887 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1888 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1889 dst[2*srcWidth-1]= src[srcWidth-1]; |
23129 | 1890 |
29480 | 1891 dst+= dstStride; |
18861 | 1892 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1893 for (y=1; y<srcHeight; y++){ |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
1894 #if HAVE_MMX2 || HAVE_AMD3DNOW |
28957 | 1895 const x86_reg mmxSize= srcWidth&~15; |
27744 | 1896 __asm__ volatile( |
29480 | 1897 "mov %4, %%"REG_a" \n\t" |
1898 "1: \n\t" | |
1899 "movq (%0, %%"REG_a"), %%mm0 \n\t" | |
1900 "movq (%1, %%"REG_a"), %%mm1 \n\t" | |
1901 "movq 1(%0, %%"REG_a"), %%mm2 \n\t" | |
1902 "movq 1(%1, %%"REG_a"), %%mm3 \n\t" | |
1903 "movq -1(%0, %%"REG_a"), %%mm4 \n\t" | |
1904 "movq -1(%1, %%"REG_a"), %%mm5 \n\t" | |
1905 PAVGB" %%mm0, %%mm5 \n\t" | |
1906 PAVGB" %%mm0, %%mm3 \n\t" | |
1907 PAVGB" %%mm0, %%mm5 \n\t" | |
1908 PAVGB" %%mm0, %%mm3 \n\t" | |
1909 PAVGB" %%mm1, %%mm4 \n\t" | |
1910 PAVGB" %%mm1, %%mm2 \n\t" | |
1911 PAVGB" %%mm1, %%mm4 \n\t" | |
1912 PAVGB" %%mm1, %%mm2 \n\t" | |
1913 "movq %%mm5, %%mm7 \n\t" | |
1914 "movq %%mm4, %%mm6 \n\t" | |
1915 "punpcklbw %%mm3, %%mm5 \n\t" | |
1916 "punpckhbw %%mm3, %%mm7 \n\t" | |
1917 "punpcklbw %%mm2, %%mm4 \n\t" | |
1918 "punpckhbw %%mm2, %%mm6 \n\t" | |
18861 | 1919 #if 1 |
29480 | 1920 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" |
1921 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" | |
1922 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" | |
1923 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" | |
18861 | 1924 #else |
29480 | 1925 "movq %%mm5, (%2, %%"REG_a", 2) \n\t" |
1926 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t" | |
1927 "movq %%mm4, (%3, %%"REG_a", 2) \n\t" | |
1928 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t" | |
18861 | 1929 #endif |
29480 | 1930 "add $8, %%"REG_a" \n\t" |
1931 " js 1b \n\t" | |
1932 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), | |
1933 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), | |
1934 "g" (-mmxSize) | |
1935 : "%"REG_a | |
18861 | 1936 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1937 ); |
18861 | 1938 #else |
28968 | 1939 const x86_reg mmxSize=1; |
18861 | 1940 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1941 dst[0 ]= (3*src[0] + src[srcStride])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1942 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; |
18861 | 1943 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1944 for (x=mmxSize-1; x<srcWidth-1; x++){ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1945 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1946 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1947 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1948 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1949 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1950 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1951 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; |
18861 | 1952 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1953 dst+=dstStride*2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1954 src+=srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1955 } |
23129 | 1956 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1957 // last line |
18861 | 1958 #if 1 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1959 dst[0]= src[0]; |
23129 | 1960 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1961 for (x=0; x<srcWidth-1; x++){ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1962 dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1963 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1964 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1965 dst[2*srcWidth-1]= src[srcWidth-1]; |
18861 | 1966 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1967 for (x=0; x<srcWidth; x++){ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1968 dst[2*x+0]= |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1969 dst[2*x+1]= src[x]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1970 } |
18861 | 1971 #endif |
1972 | |
28276 | 1973 #if HAVE_MMX |
29480 | 1974 __asm__ volatile(EMMS" \n\t" |
1975 SFENCE" \n\t" | |
1976 :::"memory"); | |
18861 | 1977 #endif |
1978 } | |
1979 | |
1980 /** | |
27158 | 1981 * Height should be a multiple of 2 and width should be a multiple of 16. |
1982 * (If this is a problem for anyone then tell me, and I will fix it.) | |
1983 * Chrominance data is only taken from every second line, others are ignored. | |
25109 | 1984 * FIXME: Write HQ version. |
18861 | 1985 */ |
1986 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1987 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1988 long lumStride, long chromStride, long srcStride) |
18861 | 1989 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1990 long y; |
28968 | 1991 const x86_reg chromWidth= width>>1; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1992 for (y=0; y<height; y+=2) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
1993 { |
28276 | 1994 #if HAVE_MMX |
27744 | 1995 __asm__ volatile( |
29480 | 1996 "xor %%"REG_a", %%"REG_a" \n\t" |
1997 "pcmpeqw %%mm7, %%mm7 \n\t" | |
1998 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | |
1999 ASMALIGN(4) | |
2000 "1: \n\t" | |
2001 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |
2002 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) | |
2003 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) | |
2004 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) | |
2005 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) | |
2006 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) | |
2007 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4) | |
2008 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0) | |
2009 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4) | |
2010 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) | |
2011 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) | |
18861 | 2012 |
29480 | 2013 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" |
18861 | 2014 |
29480 | 2015 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8) |
2016 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12) | |
2017 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) | |
2018 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) | |
2019 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) | |
2020 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12) | |
2021 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8) | |
2022 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12) | |
2023 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) | |
2024 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) | |
18861 | 2025 |
29480 | 2026 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" |
18861 | 2027 |
29480 | 2028 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) |
2029 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) | |
2030 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0) | |
2031 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8) | |
2032 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0) | |
2033 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8) | |
2034 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) | |
2035 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) | |
18861 | 2036 |
29480 | 2037 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" |
2038 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" | |
18861 | 2039 |
29480 | 2040 "add $8, %%"REG_a" \n\t" |
2041 "cmp %4, %%"REG_a" \n\t" | |
2042 " jb 1b \n\t" | |
2043 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | |
2044 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2045 ); |
18861 | 2046 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2047 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2048 src += srcStride; |
18861 | 2049 |
27744 | 2050 __asm__ volatile( |
29480 | 2051 "xor %%"REG_a", %%"REG_a" \n\t" |
2052 ASMALIGN(4) | |
2053 "1: \n\t" | |
2054 PREFETCH" 64(%0, %%"REG_a", 4) \n\t" | |
2055 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) | |
2056 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) | |
2057 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) | |
2058 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) | |
2059 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) | |
2060 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) | |
2061 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) | |
2062 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12) | |
2063 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) | |
2064 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) | |
18861 | 2065 |
29480 | 2066 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" |
2067 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" | |
18861 | 2068 |
29480 | 2069 "add $8, %%"REG_a" \n\t" |
2070 "cmp %4, %%"REG_a" \n\t" | |
2071 " jb 1b \n\t" | |
18861 | 2072 |
29480 | 2073 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2074 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2075 ); |
18861 | 2076 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2077 long i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2078 for (i=0; i<chromWidth; i++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2079 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2080 udst[i] = src[4*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2081 ydst[2*i+0] = src[4*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2082 vdst[i] = src[4*i+2]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2083 ydst[2*i+1] = src[4*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2084 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2085 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2086 src += srcStride; |
18861 | 2087 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2088 for (i=0; i<chromWidth; i++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2089 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2090 ydst[2*i+0] = src[4*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2091 ydst[2*i+1] = src[4*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2092 } |
18861 | 2093 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2094 udst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2095 vdst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2096 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2097 src += srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2098 } |
28276 | 2099 #if HAVE_MMX |
29480 | 2100 __asm__ volatile(EMMS" \n\t" |
2101 SFENCE" \n\t" | |
2102 :::"memory"); | |
18861 | 2103 #endif |
2104 } | |
2105 | |
2106 /** | |
27158 | 2107 * Height should be a multiple of 2 and width should be a multiple of 2. |
2108 * (If this is a problem for anyone then tell me, and I will fix it.) | |
2109 * Chrominance data is only taken from every second line, | |
25109 | 2110 * others are ignored in the C version. |
2111 * FIXME: Write HQ version. | |
18861 | 2112 */ |
2113 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2114 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2115 long lumStride, long chromStride, long srcStride) |
18861 | 2116 { |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2117 long y; |
28968 | 2118 const x86_reg chromWidth= width>>1; |
28276 | 2119 #if HAVE_MMX |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2120 for (y=0; y<height-2; y+=2) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2121 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2122 long i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2123 for (i=0; i<2; i++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2124 { |
27744 | 2125 __asm__ volatile( |
29480 | 2126 "mov %2, %%"REG_a" \n\t" |
2127 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" | |
2128 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |
2129 "pxor %%mm7, %%mm7 \n\t" | |
2130 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |
2131 ASMALIGN(4) | |
2132 "1: \n\t" | |
2133 PREFETCH" 64(%0, %%"REG_d") \n\t" | |
2134 "movd (%0, %%"REG_d"), %%mm0 \n\t" | |
2135 "movd 3(%0, %%"REG_d"), %%mm1 \n\t" | |
2136 "punpcklbw %%mm7, %%mm0 \n\t" | |
2137 "punpcklbw %%mm7, %%mm1 \n\t" | |
2138 "movd 6(%0, %%"REG_d"), %%mm2 \n\t" | |
2139 "movd 9(%0, %%"REG_d"), %%mm3 \n\t" | |
2140 "punpcklbw %%mm7, %%mm2 \n\t" | |
2141 "punpcklbw %%mm7, %%mm3 \n\t" | |
2142 "pmaddwd %%mm6, %%mm0 \n\t" | |
2143 "pmaddwd %%mm6, %%mm1 \n\t" | |
2144 "pmaddwd %%mm6, %%mm2 \n\t" | |
2145 "pmaddwd %%mm6, %%mm3 \n\t" | |
18861 | 2146 #ifndef FAST_BGR2YV12 |
29480 | 2147 "psrad $8, %%mm0 \n\t" |
2148 "psrad $8, %%mm1 \n\t" | |
2149 "psrad $8, %%mm2 \n\t" | |
2150 "psrad $8, %%mm3 \n\t" | |
18861 | 2151 #endif |
29480 | 2152 "packssdw %%mm1, %%mm0 \n\t" |
2153 "packssdw %%mm3, %%mm2 \n\t" | |
2154 "pmaddwd %%mm5, %%mm0 \n\t" | |
2155 "pmaddwd %%mm5, %%mm2 \n\t" | |
2156 "packssdw %%mm2, %%mm0 \n\t" | |
2157 "psraw $7, %%mm0 \n\t" | |
18861 | 2158 |
29480 | 2159 "movd 12(%0, %%"REG_d"), %%mm4 \n\t" |
2160 "movd 15(%0, %%"REG_d"), %%mm1 \n\t" | |
2161 "punpcklbw %%mm7, %%mm4 \n\t" | |
2162 "punpcklbw %%mm7, %%mm1 \n\t" | |
2163 "movd 18(%0, %%"REG_d"), %%mm2 \n\t" | |
2164 "movd 21(%0, %%"REG_d"), %%mm3 \n\t" | |
2165 "punpcklbw %%mm7, %%mm2 \n\t" | |
2166 "punpcklbw %%mm7, %%mm3 \n\t" | |
2167 "pmaddwd %%mm6, %%mm4 \n\t" | |
2168 "pmaddwd %%mm6, %%mm1 \n\t" | |
2169 "pmaddwd %%mm6, %%mm2 \n\t" | |
2170 "pmaddwd %%mm6, %%mm3 \n\t" | |
18861 | 2171 #ifndef FAST_BGR2YV12 |
29480 | 2172 "psrad $8, %%mm4 \n\t" |
2173 "psrad $8, %%mm1 \n\t" | |
2174 "psrad $8, %%mm2 \n\t" | |
2175 "psrad $8, %%mm3 \n\t" | |
18861 | 2176 #endif |
29480 | 2177 "packssdw %%mm1, %%mm4 \n\t" |
2178 "packssdw %%mm3, %%mm2 \n\t" | |
2179 "pmaddwd %%mm5, %%mm4 \n\t" | |
2180 "pmaddwd %%mm5, %%mm2 \n\t" | |
2181 "add $24, %%"REG_d" \n\t" | |
2182 "packssdw %%mm2, %%mm4 \n\t" | |
2183 "psraw $7, %%mm4 \n\t" | |
18861 | 2184 |
29480 | 2185 "packuswb %%mm4, %%mm0 \n\t" |
2186 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" | |
18861 | 2187 |
29480 | 2188 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" |
2189 "add $8, %%"REG_a" \n\t" | |
2190 " js 1b \n\t" | |
2191 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) | |
2192 : "%"REG_a, "%"REG_d | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2193 ); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2194 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2195 src += srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2196 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2197 src -= srcStride*2; |
27744 | 2198 __asm__ volatile( |
29480 | 2199 "mov %4, %%"REG_a" \n\t" |
2200 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |
2201 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" | |
2202 "pxor %%mm7, %%mm7 \n\t" | |
2203 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |
2204 "add %%"REG_d", %%"REG_d" \n\t" | |
2205 ASMALIGN(4) | |
2206 "1: \n\t" | |
2207 PREFETCH" 64(%0, %%"REG_d") \n\t" | |
2208 PREFETCH" 64(%1, %%"REG_d") \n\t" | |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
2209 #if HAVE_MMX2 || HAVE_AMD3DNOW |
29480 | 2210 "movq (%0, %%"REG_d"), %%mm0 \n\t" |
2211 "movq (%1, %%"REG_d"), %%mm1 \n\t" | |
2212 "movq 6(%0, %%"REG_d"), %%mm2 \n\t" | |
2213 "movq 6(%1, %%"REG_d"), %%mm3 \n\t" | |
2214 PAVGB" %%mm1, %%mm0 \n\t" | |
2215 PAVGB" %%mm3, %%mm2 \n\t" | |
2216 "movq %%mm0, %%mm1 \n\t" | |
2217 "movq %%mm2, %%mm3 \n\t" | |
2218 "psrlq $24, %%mm0 \n\t" | |
2219 "psrlq $24, %%mm2 \n\t" | |
2220 PAVGB" %%mm1, %%mm0 \n\t" | |
2221 PAVGB" %%mm3, %%mm2 \n\t" | |
2222 "punpcklbw %%mm7, %%mm0 \n\t" | |
2223 "punpcklbw %%mm7, %%mm2 \n\t" | |
18861 | 2224 #else |
29480 | 2225 "movd (%0, %%"REG_d"), %%mm0 \n\t" |
2226 "movd (%1, %%"REG_d"), %%mm1 \n\t" | |
2227 "movd 3(%0, %%"REG_d"), %%mm2 \n\t" | |
2228 "movd 3(%1, %%"REG_d"), %%mm3 \n\t" | |
2229 "punpcklbw %%mm7, %%mm0 \n\t" | |
2230 "punpcklbw %%mm7, %%mm1 \n\t" | |
2231 "punpcklbw %%mm7, %%mm2 \n\t" | |
2232 "punpcklbw %%mm7, %%mm3 \n\t" | |
2233 "paddw %%mm1, %%mm0 \n\t" | |
2234 "paddw %%mm3, %%mm2 \n\t" | |
2235 "paddw %%mm2, %%mm0 \n\t" | |
2236 "movd 6(%0, %%"REG_d"), %%mm4 \n\t" | |
2237 "movd 6(%1, %%"REG_d"), %%mm1 \n\t" | |
2238 "movd 9(%0, %%"REG_d"), %%mm2 \n\t" | |
2239 "movd 9(%1, %%"REG_d"), %%mm3 \n\t" | |
2240 "punpcklbw %%mm7, %%mm4 \n\t" | |
2241 "punpcklbw %%mm7, %%mm1 \n\t" | |
2242 "punpcklbw %%mm7, %%mm2 \n\t" | |
2243 "punpcklbw %%mm7, %%mm3 \n\t" | |
2244 "paddw %%mm1, %%mm4 \n\t" | |
2245 "paddw %%mm3, %%mm2 \n\t" | |
2246 "paddw %%mm4, %%mm2 \n\t" | |
2247 "psrlw $2, %%mm0 \n\t" | |
2248 "psrlw $2, %%mm2 \n\t" | |
18861 | 2249 #endif |
29480 | 2250 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" |
2251 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |
18861 | 2252 |
29480 | 2253 "pmaddwd %%mm0, %%mm1 \n\t" |
2254 "pmaddwd %%mm2, %%mm3 \n\t" | |
2255 "pmaddwd %%mm6, %%mm0 \n\t" | |
2256 "pmaddwd %%mm6, %%mm2 \n\t" | |
18861 | 2257 #ifndef FAST_BGR2YV12 |
29480 | 2258 "psrad $8, %%mm0 \n\t" |
2259 "psrad $8, %%mm1 \n\t" | |
2260 "psrad $8, %%mm2 \n\t" | |
2261 "psrad $8, %%mm3 \n\t" | |
18861 | 2262 #endif |
29480 | 2263 "packssdw %%mm2, %%mm0 \n\t" |
2264 "packssdw %%mm3, %%mm1 \n\t" | |
2265 "pmaddwd %%mm5, %%mm0 \n\t" | |
2266 "pmaddwd %%mm5, %%mm1 \n\t" | |
2267 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | |
2268 "psraw $7, %%mm0 \n\t" | |
18861 | 2269 |
28323
99c49467ebbc
HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.
diego
parents:
28276
diff
changeset
|
2270 #if HAVE_MMX2 || HAVE_AMD3DNOW |
29480 | 2271 "movq 12(%0, %%"REG_d"), %%mm4 \n\t" |
2272 "movq 12(%1, %%"REG_d"), %%mm1 \n\t" | |
2273 "movq 18(%0, %%"REG_d"), %%mm2 \n\t" | |
2274 "movq 18(%1, %%"REG_d"), %%mm3 \n\t" | |
2275 PAVGB" %%mm1, %%mm4 \n\t" | |
2276 PAVGB" %%mm3, %%mm2 \n\t" | |
2277 "movq %%mm4, %%mm1 \n\t" | |
2278 "movq %%mm2, %%mm3 \n\t" | |
2279 "psrlq $24, %%mm4 \n\t" | |
2280 "psrlq $24, %%mm2 \n\t" | |
2281 PAVGB" %%mm1, %%mm4 \n\t" | |
2282 PAVGB" %%mm3, %%mm2 \n\t" | |
2283 "punpcklbw %%mm7, %%mm4 \n\t" | |
2284 "punpcklbw %%mm7, %%mm2 \n\t" | |
18861 | 2285 #else |
29480 | 2286 "movd 12(%0, %%"REG_d"), %%mm4 \n\t" |
2287 "movd 12(%1, %%"REG_d"), %%mm1 \n\t" | |
2288 "movd 15(%0, %%"REG_d"), %%mm2 \n\t" | |
2289 "movd 15(%1, %%"REG_d"), %%mm3 \n\t" | |
2290 "punpcklbw %%mm7, %%mm4 \n\t" | |
2291 "punpcklbw %%mm7, %%mm1 \n\t" | |
2292 "punpcklbw %%mm7, %%mm2 \n\t" | |
2293 "punpcklbw %%mm7, %%mm3 \n\t" | |
2294 "paddw %%mm1, %%mm4 \n\t" | |
2295 "paddw %%mm3, %%mm2 \n\t" | |
2296 "paddw %%mm2, %%mm4 \n\t" | |
2297 "movd 18(%0, %%"REG_d"), %%mm5 \n\t" | |
2298 "movd 18(%1, %%"REG_d"), %%mm1 \n\t" | |
2299 "movd 21(%0, %%"REG_d"), %%mm2 \n\t" | |
2300 "movd 21(%1, %%"REG_d"), %%mm3 \n\t" | |
2301 "punpcklbw %%mm7, %%mm5 \n\t" | |
2302 "punpcklbw %%mm7, %%mm1 \n\t" | |
2303 "punpcklbw %%mm7, %%mm2 \n\t" | |
2304 "punpcklbw %%mm7, %%mm3 \n\t" | |
2305 "paddw %%mm1, %%mm5 \n\t" | |
2306 "paddw %%mm3, %%mm2 \n\t" | |
2307 "paddw %%mm5, %%mm2 \n\t" | |
2308 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |
2309 "psrlw $2, %%mm4 \n\t" | |
2310 "psrlw $2, %%mm2 \n\t" | |
18861 | 2311 #endif |
29480 | 2312 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" |
2313 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |
18861 | 2314 |
29480 | 2315 "pmaddwd %%mm4, %%mm1 \n\t" |
2316 "pmaddwd %%mm2, %%mm3 \n\t" | |
2317 "pmaddwd %%mm6, %%mm4 \n\t" | |
2318 "pmaddwd %%mm6, %%mm2 \n\t" | |
18861 | 2319 #ifndef FAST_BGR2YV12 |
29480 | 2320 "psrad $8, %%mm4 \n\t" |
2321 "psrad $8, %%mm1 \n\t" | |
2322 "psrad $8, %%mm2 \n\t" | |
2323 "psrad $8, %%mm3 \n\t" | |
18861 | 2324 #endif |
29480 | 2325 "packssdw %%mm2, %%mm4 \n\t" |
2326 "packssdw %%mm3, %%mm1 \n\t" | |
2327 "pmaddwd %%mm5, %%mm4 \n\t" | |
2328 "pmaddwd %%mm5, %%mm1 \n\t" | |
2329 "add $24, %%"REG_d" \n\t" | |
2330 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | |
2331 "psraw $7, %%mm4 \n\t" | |
18861 | 2332 |
29480 | 2333 "movq %%mm0, %%mm1 \n\t" |
2334 "punpckldq %%mm4, %%mm0 \n\t" | |
2335 "punpckhdq %%mm4, %%mm1 \n\t" | |
2336 "packsswb %%mm1, %%mm0 \n\t" | |
2337 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" | |
2338 "movd %%mm0, (%2, %%"REG_a") \n\t" | |
2339 "punpckhdq %%mm0, %%mm0 \n\t" | |
2340 "movd %%mm0, (%3, %%"REG_a") \n\t" | |
2341 "add $4, %%"REG_a" \n\t" | |
2342 " js 1b \n\t" | |
2343 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) | |
2344 : "%"REG_a, "%"REG_d | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2345 ); |
18861 | 2346 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2347 udst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2348 vdst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2349 src += srcStride*2; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2350 } |
18861 | 2351 |
29480 | 2352 __asm__ volatile(EMMS" \n\t" |
2353 SFENCE" \n\t" | |
2354 :::"memory"); | |
18861 | 2355 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2356 y=0; |
18861 | 2357 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2358 for (; y<height; y+=2) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2359 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2360 long i; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2361 for (i=0; i<chromWidth; i++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2362 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2363 unsigned int b = src[6*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2364 unsigned int g = src[6*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2365 unsigned int r = src[6*i+2]; |
18861 | 2366 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2367 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2368 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2369 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128; |
18861 | 2370 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2371 udst[i] = U; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2372 vdst[i] = V; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2373 ydst[2*i] = Y; |
18861 | 2374 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2375 b = src[6*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2376 g = src[6*i+4]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2377 r = src[6*i+5]; |
18861 | 2378 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2379 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2380 ydst[2*i+1] = Y; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2381 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2382 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2383 src += srcStride; |
18861 | 2384 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2385 for (i=0; i<chromWidth; i++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2386 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2387 unsigned int b = src[6*i+0]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2388 unsigned int g = src[6*i+1]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2389 unsigned int r = src[6*i+2]; |
18861 | 2390 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2391 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
18861 | 2392 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2393 ydst[2*i] = Y; |
18861 | 2394 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2395 b = src[6*i+3]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2396 g = src[6*i+4]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2397 r = src[6*i+5]; |
18861 | 2398 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2399 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2400 ydst[2*i+1] = Y; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2401 } |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2402 udst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2403 vdst += chromStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2404 ydst += lumStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2405 src += srcStride; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2406 } |
18861 | 2407 } |
2408 | |
27703
3815517f49d4
Mark variation-specific interleaveBytes static.
flameeyes
parents:
27690
diff
changeset
|
2409 static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2410 long width, long height, long src1Stride, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2411 long src2Stride, long dstStride){ |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2412 long h; |
18861 | 2413 |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2414 for (h=0; h < height; h++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2415 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2416 long w; |
18861 | 2417 |
28276 | 2418 #if HAVE_MMX |
2419 #if HAVE_SSE2 | |
27744 | 2420 __asm__( |
29480 | 2421 "xor %%"REG_a", %%"REG_a" \n\t" |
2422 "1: \n\t" | |
2423 PREFETCH" 64(%1, %%"REG_a") \n\t" | |
2424 PREFETCH" 64(%2, %%"REG_a") \n\t" | |
2425 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t" | |
2426 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t" | |
2427 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t" | |
2428 "punpcklbw %%xmm2, %%xmm0 \n\t" | |
2429 "punpckhbw %%xmm2, %%xmm1 \n\t" | |
2430 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" | |
2431 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" | |
2432 "add $16, %%"REG_a" \n\t" | |
2433 "cmp %3, %%"REG_a" \n\t" | |
2434 " jb 1b \n\t" | |
2435 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | |
2436 : "memory", "%"REG_a"" | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2437 ); |
18861 | 2438 #else |
27744 | 2439 __asm__( |
29480 | 2440 "xor %%"REG_a", %%"REG_a" \n\t" |
2441 "1: \n\t" | |
2442 PREFETCH" 64(%1, %%"REG_a") \n\t" | |
2443 PREFETCH" 64(%2, %%"REG_a") \n\t" | |
2444 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
2445 "movq 8(%1, %%"REG_a"), %%mm2 \n\t" | |
2446 "movq %%mm0, %%mm1 \n\t" | |
2447 "movq %%mm2, %%mm3 \n\t" | |
2448 "movq (%2, %%"REG_a"), %%mm4 \n\t" | |
2449 "movq 8(%2, %%"REG_a"), %%mm5 \n\t" | |
2450 "punpcklbw %%mm4, %%mm0 \n\t" | |
2451 "punpckhbw %%mm4, %%mm1 \n\t" | |
2452 "punpcklbw %%mm5, %%mm2 \n\t" | |
2453 "punpckhbw %%mm5, %%mm3 \n\t" | |
2454 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t" | |
2455 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t" | |
2456 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" | |
2457 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" | |
2458 "add $16, %%"REG_a" \n\t" | |
2459 "cmp %3, %%"REG_a" \n\t" | |
2460 " jb 1b \n\t" | |
2461 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | |
2462 : "memory", "%"REG_a | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2463 ); |
18861 | 2464 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2465 for (w= (width&(~15)); w < width; w++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2466 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2467 dest[2*w+0] = src1[w]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2468 dest[2*w+1] = src2[w]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2469 } |
18861 | 2470 #else |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2471 for (w=0; w < width; w++) |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2472 { |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2473 dest[2*w+0] = src1[w]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2474 dest[2*w+1] = src2[w]; |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2475 } |
18861 | 2476 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2477 dest += dstStride; |
18861 | 2478 src1 += src1Stride; |
2479 src2 += src2Stride; | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2480 } |
28276 | 2481 #if HAVE_MMX |
27744 | 2482 __asm__( |
29480 | 2483 EMMS" \n\t" |
2484 SFENCE" \n\t" | |
2485 ::: "memory" | |
2486 ); | |
18861 | 2487 #endif |
2488 } | |
2489 | |
2490 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2491 uint8_t *dst1, uint8_t *dst2, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2492 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2493 long srcStride1, long srcStride2, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2494 long dstStride1, long dstStride2) |
18861 | 2495 { |
28968 | 2496 x86_reg y; |
2497 long x,w,h; | |
18861 | 2498 w=width/2; h=height/2; |
28276 | 2499 #if HAVE_MMX |
27744 | 2500 __asm__ volatile( |
29480 | 2501 PREFETCH" %0 \n\t" |
2502 PREFETCH" %1 \n\t" | |
2503 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); | |
18861 | 2504 #endif |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2505 for (y=0;y<h;y++){ |
29480 | 2506 const uint8_t* s1=src1+srcStride1*(y>>1); |
2507 uint8_t* d=dst1+dstStride1*y; | |
2508 x=0; | |
28276 | 2509 #if HAVE_MMX |
29480 | 2510 for (;x<w-31;x+=32) |
2511 { | |
2512 __asm__ volatile( | |
2513 PREFETCH" 32%1 \n\t" | |
2514 "movq %1, %%mm0 \n\t" | |
2515 "movq 8%1, %%mm2 \n\t" | |
2516 "movq 16%1, %%mm4 \n\t" | |
2517 "movq 24%1, %%mm6 \n\t" | |
2518 "movq %%mm0, %%mm1 \n\t" | |
2519 "movq %%mm2, %%mm3 \n\t" | |
2520 "movq %%mm4, %%mm5 \n\t" | |
2521 "movq %%mm6, %%mm7 \n\t" | |
2522 "punpcklbw %%mm0, %%mm0 \n\t" | |
2523 "punpckhbw %%mm1, %%mm1 \n\t" | |
2524 "punpcklbw %%mm2, %%mm2 \n\t" | |
2525 "punpckhbw %%mm3, %%mm3 \n\t" | |
2526 "punpcklbw %%mm4, %%mm4 \n\t" | |
2527 "punpckhbw %%mm5, %%mm5 \n\t" | |
2528 "punpcklbw %%mm6, %%mm6 \n\t" | |
2529 "punpckhbw %%mm7, %%mm7 \n\t" | |
2530 MOVNTQ" %%mm0, %0 \n\t" | |
2531 MOVNTQ" %%mm1, 8%0 \n\t" | |
2532 MOVNTQ" %%mm2, 16%0 \n\t" | |
2533 MOVNTQ" %%mm3, 24%0 \n\t" | |
2534 MOVNTQ" %%mm4, 32%0 \n\t" | |
2535 MOVNTQ" %%mm5, 40%0 \n\t" | |
2536 MOVNTQ" %%mm6, 48%0 \n\t" | |
2537 MOVNTQ" %%mm7, 56%0" | |
2538 :"=m"(d[2*x]) | |
2539 :"m"(s1[x]) | |
2540 :"memory"); | |
2541 } | |
18861 | 2542 #endif |
29480 | 2543 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
18861 | 2544 } |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2545 for (y=0;y<h;y++){ |
29480 | 2546 const uint8_t* s2=src2+srcStride2*(y>>1); |
2547 uint8_t* d=dst2+dstStride2*y; | |
2548 x=0; | |
28276 | 2549 #if HAVE_MMX |
29480 | 2550 for (;x<w-31;x+=32) |
2551 { | |
2552 __asm__ volatile( | |
2553 PREFETCH" 32%1 \n\t" | |
2554 "movq %1, %%mm0 \n\t" | |
2555 "movq 8%1, %%mm2 \n\t" | |
2556 "movq 16%1, %%mm4 \n\t" | |
2557 "movq 24%1, %%mm6 \n\t" | |
2558 "movq %%mm0, %%mm1 \n\t" | |
2559 "movq %%mm2, %%mm3 \n\t" | |
2560 "movq %%mm4, %%mm5 \n\t" | |
2561 "movq %%mm6, %%mm7 \n\t" | |
2562 "punpcklbw %%mm0, %%mm0 \n\t" | |
2563 "punpckhbw %%mm1, %%mm1 \n\t" | |
2564 "punpcklbw %%mm2, %%mm2 \n\t" | |
2565 "punpckhbw %%mm3, %%mm3 \n\t" | |
2566 "punpcklbw %%mm4, %%mm4 \n\t" | |
2567 "punpckhbw %%mm5, %%mm5 \n\t" | |
2568 "punpcklbw %%mm6, %%mm6 \n\t" | |
2569 "punpckhbw %%mm7, %%mm7 \n\t" | |
2570 MOVNTQ" %%mm0, %0 \n\t" | |
2571 MOVNTQ" %%mm1, 8%0 \n\t" | |
2572 MOVNTQ" %%mm2, 16%0 \n\t" | |
2573 MOVNTQ" %%mm3, 24%0 \n\t" | |
2574 MOVNTQ" %%mm4, 32%0 \n\t" | |
2575 MOVNTQ" %%mm5, 40%0 \n\t" | |
2576 MOVNTQ" %%mm6, 48%0 \n\t" | |
2577 MOVNTQ" %%mm7, 56%0" | |
2578 :"=m"(d[2*x]) | |
2579 :"m"(s2[x]) | |
2580 :"memory"); | |
2581 } | |
18861 | 2582 #endif |
29480 | 2583 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |
18861 | 2584 } |
28276 | 2585 #if HAVE_MMX |
27744 | 2586 __asm__( |
29480 | 2587 EMMS" \n\t" |
2588 SFENCE" \n\t" | |
2589 ::: "memory" | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2590 ); |
18861 | 2591 #endif |
2592 } | |
2593 | |
2594 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2595 uint8_t *dst, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2596 long width, long height, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2597 long srcStride1, long srcStride2, |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2598 long srcStride3, long dstStride) |
18861 | 2599 { |
28968 | 2600 x86_reg x; |
28957 | 2601 long y,w,h; |
18861 | 2602 w=width/2; h=height; |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2603 for (y=0;y<h;y++){ |
29480 | 2604 const uint8_t* yp=src1+srcStride1*y; |
2605 const uint8_t* up=src2+srcStride2*(y>>2); | |
2606 const uint8_t* vp=src3+srcStride3*(y>>2); | |
2607 uint8_t* d=dst+dstStride*y; | |
2608 x=0; | |
28276 | 2609 #if HAVE_MMX |
29480 | 2610 for (;x<w-7;x+=8) |
2611 { | |
2612 __asm__ volatile( | |
2613 PREFETCH" 32(%1, %0) \n\t" | |
2614 PREFETCH" 32(%2, %0) \n\t" | |
2615 PREFETCH" 32(%3, %0) \n\t" | |
2616 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ | |
2617 "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */ | |
2618 "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */ | |
2619 "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ | |
2620 "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */ | |
2621 "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */ | |
2622 "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */ | |
2623 "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */ | |
2624 "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */ | |
2625 "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */ | |
18861 | 2626 |
29480 | 2627 "movq %%mm1, %%mm6 \n\t" |
2628 "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/ | |
2629 "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/ | |
2630 "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ | |
2631 MOVNTQ" %%mm0, (%4, %0, 8) \n\t" | |
2632 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t" | |
23129 | 2633 |
29480 | 2634 "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/ |
2635 "movq 8(%1, %0, 4), %%mm0 \n\t" | |
2636 "movq %%mm0, %%mm3 \n\t" | |
2637 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/ | |
2638 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/ | |
2639 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t" | |
2640 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t" | |
18861 | 2641 |
29480 | 2642 "movq %%mm4, %%mm6 \n\t" |
2643 "movq 16(%1, %0, 4), %%mm0 \n\t" | |
2644 "movq %%mm0, %%mm3 \n\t" | |
2645 "punpcklbw %%mm5, %%mm4 \n\t" | |
2646 "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/ | |
2647 "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/ | |
2648 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t" | |
2649 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t" | |
23129 | 2650 |
29480 | 2651 "punpckhbw %%mm5, %%mm6 \n\t" |
2652 "movq 24(%1, %0, 4), %%mm0 \n\t" | |
2653 "movq %%mm0, %%mm3 \n\t" | |
2654 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/ | |
2655 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/ | |
2656 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t" | |
2657 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t" | |
18861 | 2658 |
29480 | 2659 : "+r" (x) |
2660 : "r"(yp), "r" (up), "r"(vp), "r"(d) | |
2661 :"memory"); | |
2662 } | |
18861 | 2663 #endif |
29480 | 2664 for (; x<w; x++) |
2665 { | |
2666 const long x2 = x<<2; | |
2667 d[8*x+0] = yp[x2]; | |
2668 d[8*x+1] = up[x]; | |
2669 d[8*x+2] = yp[x2+1]; | |
2670 d[8*x+3] = vp[x]; | |
2671 d[8*x+4] = yp[x2+2]; | |
2672 d[8*x+5] = up[x]; | |
2673 d[8*x+6] = yp[x2+3]; | |
2674 d[8*x+7] = vp[x]; | |
2675 } | |
18861 | 2676 } |
28276 | 2677 #if HAVE_MMX |
27744 | 2678 __asm__( |
29480 | 2679 EMMS" \n\t" |
2680 SFENCE" \n\t" | |
2681 ::: "memory" | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
2682 ); |
18861 | 2683 #endif |
2684 } | |
22960 | 2685 |
28962 | 2686 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) |
2687 { | |
2688 dst += count; | |
2689 src += 2*count; | |
2690 count= - count; | |
2691 | |
2692 #if HAVE_MMX | |
2693 if(count <= -16){ | |
2694 count += 15; | |
2695 __asm__ volatile( | |
2696 "pcmpeqw %%mm7, %%mm7 \n\t" | |
2697 "psrlw $8, %%mm7 \n\t" | |
2698 "1: \n\t" | |
2699 "movq -30(%1, %0, 2), %%mm0 \n\t" | |
2700 "movq -22(%1, %0, 2), %%mm1 \n\t" | |
2701 "movq -14(%1, %0, 2), %%mm2 \n\t" | |
2702 "movq -6(%1, %0, 2), %%mm3 \n\t" | |
2703 "pand %%mm7, %%mm0 \n\t" | |
2704 "pand %%mm7, %%mm1 \n\t" | |
2705 "pand %%mm7, %%mm2 \n\t" | |
2706 "pand %%mm7, %%mm3 \n\t" | |
2707 "packuswb %%mm1, %%mm0 \n\t" | |
2708 "packuswb %%mm3, %%mm2 \n\t" | |
2709 MOVNTQ" %%mm0,-15(%2, %0) \n\t" | |
2710 MOVNTQ" %%mm2,- 7(%2, %0) \n\t" | |
2711 "add $16, %0 \n\t" | |
2712 " js 1b \n\t" | |
2713 : "+r"(count) | |
2714 : "r"(src), "r"(dst) | |
2715 ); | |
2716 count -= 15; | |
2717 } | |
2718 #endif | |
2719 while(count<0){ | |
2720 dst[count]= src[2*count]; | |
2721 count++; | |
2722 } | |
2723 } | |
2724 | |
2725 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) | |
2726 { | |
2727 dst0+= count; | |
2728 dst1+= count; | |
2729 src += 4*count; | |
2730 count= - count; | |
2731 #if HAVE_MMX | |
2732 if(count <= -8){ | |
2733 count += 7; | |
2734 __asm__ volatile( | |
2735 "pcmpeqw %%mm7, %%mm7 \n\t" | |
2736 "psrlw $8, %%mm7 \n\t" | |
2737 "1: \n\t" | |
2738 "movq -28(%1, %0, 4), %%mm0 \n\t" | |
2739 "movq -20(%1, %0, 4), %%mm1 \n\t" | |
2740 "movq -12(%1, %0, 4), %%mm2 \n\t" | |
2741 "movq -4(%1, %0, 4), %%mm3 \n\t" | |
2742 "pand %%mm7, %%mm0 \n\t" | |
2743 "pand %%mm7, %%mm1 \n\t" | |
2744 "pand %%mm7, %%mm2 \n\t" | |
2745 "pand %%mm7, %%mm3 \n\t" | |
2746 "packuswb %%mm1, %%mm0 \n\t" | |
2747 "packuswb %%mm3, %%mm2 \n\t" | |
2748 "movq %%mm0, %%mm1 \n\t" | |
2749 "movq %%mm2, %%mm3 \n\t" | |
2750 "psrlw $8, %%mm0 \n\t" | |
2751 "psrlw $8, %%mm2 \n\t" | |
2752 "pand %%mm7, %%mm1 \n\t" | |
2753 "pand %%mm7, %%mm3 \n\t" | |
2754 "packuswb %%mm2, %%mm0 \n\t" | |
2755 "packuswb %%mm3, %%mm1 \n\t" | |
2756 MOVNTQ" %%mm0,- 7(%3, %0) \n\t" | |
2757 MOVNTQ" %%mm1,- 7(%2, %0) \n\t" | |
2758 "add $8, %0 \n\t" | |
2759 " js 1b \n\t" | |
2760 : "+r"(count) | |
2761 : "r"(src), "r"(dst0), "r"(dst1) | |
2762 ); | |
2763 count -= 7; | |
2764 } | |
2765 #endif | |
2766 while(count<0){ | |
2767 dst0[count]= src[4*count+0]; | |
2768 dst1[count]= src[4*count+2]; | |
2769 count++; | |
2770 } | |
2771 } | |
2772 | |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2773 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2774 { |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2775 dst0 += count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2776 dst1 += count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2777 src0 += 4*count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2778 src1 += 4*count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2779 count= - count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2780 #ifdef PAVGB |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2781 if(count <= -8){ |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2782 count += 7; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2783 __asm__ volatile( |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2784 "pcmpeqw %%mm7, %%mm7 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2785 "psrlw $8, %%mm7 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2786 "1: \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2787 "movq -28(%1, %0, 4), %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2788 "movq -20(%1, %0, 4), %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2789 "movq -12(%1, %0, 4), %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2790 "movq -4(%1, %0, 4), %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2791 PAVGB" -28(%2, %0, 4), %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2792 PAVGB" -20(%2, %0, 4), %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2793 PAVGB" -12(%2, %0, 4), %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2794 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2795 "pand %%mm7, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2796 "pand %%mm7, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2797 "pand %%mm7, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2798 "pand %%mm7, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2799 "packuswb %%mm1, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2800 "packuswb %%mm3, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2801 "movq %%mm0, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2802 "movq %%mm2, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2803 "psrlw $8, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2804 "psrlw $8, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2805 "pand %%mm7, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2806 "pand %%mm7, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2807 "packuswb %%mm2, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2808 "packuswb %%mm3, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2809 MOVNTQ" %%mm0,- 7(%4, %0) \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2810 MOVNTQ" %%mm1,- 7(%3, %0) \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2811 "add $8, %0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2812 " js 1b \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2813 : "+r"(count) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2814 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2815 ); |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2816 count -= 7; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2817 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2818 #endif |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2819 while(count<0){ |
28995
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2820 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2821 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2822 count++; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2823 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2824 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2825 |
28962 | 2826 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
2827 { | |
2828 dst0+= count; | |
2829 dst1+= count; | |
2830 src += 4*count; | |
2831 count= - count; | |
2832 #if HAVE_MMX | |
2833 if(count <= -8){ | |
2834 count += 7; | |
2835 __asm__ volatile( | |
2836 "pcmpeqw %%mm7, %%mm7 \n\t" | |
2837 "psrlw $8, %%mm7 \n\t" | |
2838 "1: \n\t" | |
2839 "movq -28(%1, %0, 4), %%mm0 \n\t" | |
2840 "movq -20(%1, %0, 4), %%mm1 \n\t" | |
2841 "movq -12(%1, %0, 4), %%mm2 \n\t" | |
2842 "movq -4(%1, %0, 4), %%mm3 \n\t" | |
2843 "psrlw $8, %%mm0 \n\t" | |
2844 "psrlw $8, %%mm1 \n\t" | |
2845 "psrlw $8, %%mm2 \n\t" | |
2846 "psrlw $8, %%mm3 \n\t" | |
2847 "packuswb %%mm1, %%mm0 \n\t" | |
2848 "packuswb %%mm3, %%mm2 \n\t" | |
2849 "movq %%mm0, %%mm1 \n\t" | |
2850 "movq %%mm2, %%mm3 \n\t" | |
2851 "psrlw $8, %%mm0 \n\t" | |
2852 "psrlw $8, %%mm2 \n\t" | |
2853 "pand %%mm7, %%mm1 \n\t" | |
2854 "pand %%mm7, %%mm3 \n\t" | |
2855 "packuswb %%mm2, %%mm0 \n\t" | |
2856 "packuswb %%mm3, %%mm1 \n\t" | |
2857 MOVNTQ" %%mm0,- 7(%3, %0) \n\t" | |
2858 MOVNTQ" %%mm1,- 7(%2, %0) \n\t" | |
2859 "add $8, %0 \n\t" | |
2860 " js 1b \n\t" | |
2861 : "+r"(count) | |
2862 : "r"(src), "r"(dst0), "r"(dst1) | |
2863 ); | |
2864 count -= 7; | |
2865 } | |
2866 #endif | |
28995
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2867 src++; |
28962 | 2868 while(count<0){ |
2869 dst0[count]= src[4*count+0]; | |
2870 dst1[count]= src[4*count+2]; | |
2871 count++; | |
2872 } | |
2873 } | |
2874 | |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2875 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2876 { |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2877 dst0 += count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2878 dst1 += count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2879 src0 += 4*count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2880 src1 += 4*count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2881 count= - count; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2882 #ifdef PAVGB |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2883 if(count <= -8){ |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2884 count += 7; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2885 __asm__ volatile( |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2886 "pcmpeqw %%mm7, %%mm7 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2887 "psrlw $8, %%mm7 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2888 "1: \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2889 "movq -28(%1, %0, 4), %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2890 "movq -20(%1, %0, 4), %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2891 "movq -12(%1, %0, 4), %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2892 "movq -4(%1, %0, 4), %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2893 PAVGB" -28(%2, %0, 4), %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2894 PAVGB" -20(%2, %0, 4), %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2895 PAVGB" -12(%2, %0, 4), %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2896 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2897 "psrlw $8, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2898 "psrlw $8, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2899 "psrlw $8, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2900 "psrlw $8, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2901 "packuswb %%mm1, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2902 "packuswb %%mm3, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2903 "movq %%mm0, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2904 "movq %%mm2, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2905 "psrlw $8, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2906 "psrlw $8, %%mm2 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2907 "pand %%mm7, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2908 "pand %%mm7, %%mm3 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2909 "packuswb %%mm2, %%mm0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2910 "packuswb %%mm3, %%mm1 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2911 MOVNTQ" %%mm0,- 7(%4, %0) \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2912 MOVNTQ" %%mm1,- 7(%3, %0) \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2913 "add $8, %0 \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2914 " js 1b \n\t" |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2915 : "+r"(count) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2916 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2917 ); |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2918 count -= 7; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2919 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2920 #endif |
28995
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2921 src0++; |
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2922 src1++; |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2923 while(count<0){ |
28995
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2924 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
d50adcfcf99c
10l: C code of extract_even2avg(), extract_odd2() and extract_odd2avg() was
michael
parents:
28994
diff
changeset
|
2925 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2926 count++; |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2927 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2928 } |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2929 |
28962 | 2930 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, |
2931 long width, long height, | |
2932 long lumStride, long chromStride, long srcStride) | |
2933 { | |
2934 long y; | |
2935 const long chromWidth= -((-width)>>1); | |
2936 | |
2937 for (y=0; y<height; y++){ | |
2938 RENAME(extract_even)(src, ydst, width); | |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2939 if(y&1){ |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2940 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); |
28962 | 2941 udst+= chromStride; |
2942 vdst+= chromStride; | |
2943 } | |
2944 | |
2945 src += srcStride; | |
2946 ydst+= lumStride; | |
2947 } | |
2948 #if HAVE_MMX | |
2949 __asm__( | |
29480 | 2950 EMMS" \n\t" |
2951 SFENCE" \n\t" | |
2952 ::: "memory" | |
28962 | 2953 ); |
2954 #endif | |
2955 } | |
2956 | |
2957 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | |
2958 long width, long height, | |
2959 long lumStride, long chromStride, long srcStride) | |
2960 { | |
2961 long y; | |
2962 const long chromWidth= -((-width)>>1); | |
2963 | |
2964 for (y=0; y<height; y++){ | |
2965 RENAME(extract_even)(src, ydst, width); | |
2966 RENAME(extract_odd2)(src, udst, vdst, chromWidth); | |
2967 | |
2968 src += srcStride; | |
2969 ydst+= lumStride; | |
2970 udst+= chromStride; | |
2971 vdst+= chromStride; | |
2972 } | |
2973 #if HAVE_MMX | |
2974 __asm__( | |
29480 | 2975 EMMS" \n\t" |
2976 SFENCE" \n\t" | |
2977 ::: "memory" | |
28962 | 2978 ); |
2979 #endif | |
2980 } | |
2981 | |
2982 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | |
2983 long width, long height, | |
2984 long lumStride, long chromStride, long srcStride) | |
2985 { | |
2986 long y; | |
2987 const long chromWidth= -((-width)>>1); | |
2988 | |
2989 for (y=0; y<height; y++){ | |
2990 RENAME(extract_even)(src+1, ydst, width); | |
28994
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2991 if(y&1){ |
a03804d10dbf
Average chroma of 2 lines in packed 422 -> planar 420.
michael
parents:
28968
diff
changeset
|
2992 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); |
28962 | 2993 udst+= chromStride; |
2994 vdst+= chromStride; | |
2995 } | |
2996 | |
2997 src += srcStride; | |
2998 ydst+= lumStride; | |
2999 } | |
3000 #if HAVE_MMX | |
3001 __asm__( | |
29480 | 3002 EMMS" \n\t" |
3003 SFENCE" \n\t" | |
3004 ::: "memory" | |
28962 | 3005 ); |
3006 #endif | |
3007 } | |
3008 | |
3009 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, | |
3010 long width, long height, | |
3011 long lumStride, long chromStride, long srcStride) | |
3012 { | |
3013 long y; | |
3014 const long chromWidth= -((-width)>>1); | |
3015 | |
3016 for (y=0; y<height; y++){ | |
3017 RENAME(extract_even)(src+1, ydst, width); | |
3018 RENAME(extract_even2)(src, udst, vdst, chromWidth); | |
3019 | |
3020 src += srcStride; | |
3021 ydst+= lumStride; | |
3022 udst+= chromStride; | |
3023 vdst+= chromStride; | |
3024 } | |
3025 #if HAVE_MMX | |
3026 __asm__( | |
29480 | 3027 EMMS" \n\t" |
3028 SFENCE" \n\t" | |
3029 ::: "memory" | |
28962 | 3030 ); |
3031 #endif | |
3032 } | |
3033 | |
22960 | 3034 static inline void RENAME(rgb2rgb_init)(void){ |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3035 rgb15to16 = RENAME(rgb15to16); |
27486 | 3036 rgb15tobgr24 = RENAME(rgb15tobgr24); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3037 rgb15to32 = RENAME(rgb15to32); |
27486 | 3038 rgb16tobgr24 = RENAME(rgb16tobgr24); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3039 rgb16to32 = RENAME(rgb16to32); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3040 rgb16to15 = RENAME(rgb16to15); |
27486 | 3041 rgb24tobgr16 = RENAME(rgb24tobgr16); |
3042 rgb24tobgr15 = RENAME(rgb24tobgr15); | |
3043 rgb24tobgr32 = RENAME(rgb24tobgr32); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3044 rgb32to16 = RENAME(rgb32to16); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3045 rgb32to15 = RENAME(rgb32to15); |
27486 | 3046 rgb32tobgr24 = RENAME(rgb32tobgr24); |
3047 rgb24to15 = RENAME(rgb24to15); | |
3048 rgb24to16 = RENAME(rgb24to16); | |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3049 rgb24tobgr24 = RENAME(rgb24tobgr24); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3050 rgb32tobgr32 = RENAME(rgb32tobgr32); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3051 rgb32tobgr16 = RENAME(rgb32tobgr16); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3052 rgb32tobgr15 = RENAME(rgb32tobgr15); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3053 yv12toyuy2 = RENAME(yv12toyuy2); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3054 yv12touyvy = RENAME(yv12touyvy); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3055 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); |
27495 | 3056 yuv422ptouyvy = RENAME(yuv422ptouyvy); |
23140
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3057 yuy2toyv12 = RENAME(yuy2toyv12); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3058 // yvu9toyv12 = RENAME(yvu9toyv12); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3059 planar2x = RENAME(planar2x); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3060 rgb24toyv12 = RENAME(rgb24toyv12); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3061 interleaveBytes = RENAME(interleaveBytes); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3062 vu9_to_vu12 = RENAME(vu9_to_vu12); |
4d3870361b73
cosmetics attack, part I: Remove all tabs and prettyprint/reindent the code.
diego
parents:
23139
diff
changeset
|
3063 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); |
28962 | 3064 |
3065 uyvytoyuv420 = RENAME(uyvytoyuv420); | |
3066 uyvytoyuv422 = RENAME(uyvytoyuv422); | |
3067 yuyvtoyuv420 = RENAME(yuyvtoyuv420); | |
3068 yuyvtoyuv422 = RENAME(yuyvtoyuv422); | |
22960 | 3069 } |