Mercurial > mplayer.hg
comparison libswscale/x86/yuv2rgb_template2.c @ 31078:6502a6b24f9b
alternative LGPL-licensed, MMX-optimized YUV to RGB conversion routines
written by Kostya Shishkov
author | diego |
---|---|
date | Tue, 04 May 2010 09:11:01 +0000 |
parents | |
children | b3c85aa7adbf |
comparison
equal
deleted
inserted
replaced
31077:dd7f15a3fb1b | 31078:6502a6b24f9b |
---|---|
1 /* | |
2 * software YUV to RGB converter | |
3 * | |
4 * Copyright (C) 2001-2007 Michael Niedermayer | |
5 * (c) 2010 Konstantin Shishkov | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 #undef MOVNTQ | |
25 #undef EMMS | |
26 #undef SFENCE | |
27 | |
28 #if HAVE_AMD3DNOW | |
29 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ | |
30 #define EMMS "femms" | |
31 #else | |
32 #define EMMS "emms" | |
33 #endif | |
34 | |
35 #if HAVE_MMX2 | |
36 #define MOVNTQ "movntq" | |
37 #define SFENCE "sfence" | |
38 #else | |
39 #define MOVNTQ "movq" | |
40 #define SFENCE " # nop" | |
41 #endif | |
42 | |
43 #define REG_BLUE "0" | |
44 #define REG_RED "1" | |
45 #define REG_GREEN "2" | |
46 #define REG_ALPHA "3" | |
47 | |
48 #define YUV2RGB_LOOP(depth) \ | |
49 h_size = (c->dstW + 7) & ~7; \ | |
50 if (h_size * depth > FFABS(dstStride[0])) \ | |
51 h_size -= 8; \ | |
52 \ | |
53 if (c->srcFormat == PIX_FMT_YUV422P) { \ | |
54 srcStride[1] *= 2; \ | |
55 srcStride[2] *= 2; \ | |
56 } \ | |
57 \ | |
58 __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ | |
59 for (y = 0; y < srcSliceH; y++) { \ | |
60 uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ | |
61 const uint8_t *py = src[0] + y * srcStride[0]; \ | |
62 const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \ | |
63 const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \ | |
64 x86_reg index = -h_size / 2; \ | |
65 | |
66 #define YUV2RGB_INITIAL_LOAD \ | |
67 __asm__ volatile ( \ | |
68 "movq (%5, %0, 2), %%mm6\n\t" \ | |
69 "movd (%2, %0), %%mm0\n\t" \ | |
70 "movd (%3, %0), %%mm1\n\t" \ | |
71 "1: \n\t" \ | |
72 | |
73 /* YUV2RGB core | |
74 * Conversion is performed in usual way: | |
75 * R = Y' * Ycoef + Vred * V' | |
76 * G = Y' * Ycoef + Vgreen * V' + Ugreen * U' | |
77 * B = Y' * Ycoef + Ublue * U' | |
78 * | |
79 * where X' = X * 8 - Xoffset (multiplication is performed to increase | |
80 * precision a bit). | |
81 * Since it operates in YUV420 colorspace, Y component is additionally | |
82 * split into Y1 and Y2 for even and odd pixels. | |
83 * | |
84 * Input: | |
85 * mm0 - U (4 elems), mm1 - V (4 elems), mm6 - Y (8 elems), mm4 - zero register | |
86 * Output: | |
87 * mm1 - R, mm2 - G, mm0 - B | |
88 */ | |
89 #define YUV2RGB \ | |
90 /* convert Y, U, V into Y1', Y2', U', V' */ \ | |
91 "movq %%mm6, %%mm7\n\t" \ | |
92 "punpcklbw %%mm4, %%mm0\n\t" \ | |
93 "punpcklbw %%mm4, %%mm1\n\t" \ | |
94 "pand "MANGLE(mmx_00ffw)", %%mm6\n\t" \ | |
95 "psrlw $8, %%mm7\n\t" \ | |
96 "psllw $3, %%mm0\n\t" \ | |
97 "psllw $3, %%mm1\n\t" \ | |
98 "psllw $3, %%mm6\n\t" \ | |
99 "psllw $3, %%mm7\n\t" \ | |
100 "psubsw "U_OFFSET"(%4), %%mm0\n\t" \ | |
101 "psubsw "V_OFFSET"(%4), %%mm1\n\t" \ | |
102 "psubw "Y_OFFSET"(%4), %%mm6\n\t" \ | |
103 "psubw "Y_OFFSET"(%4), %%mm7\n\t" \ | |
104 \ | |
105 /* multiply by coefficients */ \ | |
106 "movq %%mm0, %%mm2\n\t" \ | |
107 "movq %%mm1, %%mm3\n\t" \ | |
108 "pmulhw "UG_COEFF"(%4), %%mm2\n\t" \ | |
109 "pmulhw "VG_COEFF"(%4), %%mm3\n\t" \ | |
110 "pmulhw "Y_COEFF" (%4), %%mm6\n\t" \ | |
111 "pmulhw "Y_COEFF" (%4), %%mm7\n\t" \ | |
112 "pmulhw "UB_COEFF"(%4), %%mm0\n\t" \ | |
113 "pmulhw "VR_COEFF"(%4), %%mm1\n\t" \ | |
114 "paddsw %%mm3, %%mm2\n\t" \ | |
115 /* now: mm0 = UB, mm1 = VR, mm2 = CG */ \ | |
116 /* mm6 = Y1, mm7 = Y2 */ \ | |
117 \ | |
118 /* produce RGB */ \ | |
119 "movq %%mm7, %%mm3\n\t" \ | |
120 "movq %%mm7, %%mm5\n\t" \ | |
121 "paddsw %%mm0, %%mm3\n\t" \ | |
122 "paddsw %%mm1, %%mm5\n\t" \ | |
123 "paddsw %%mm2, %%mm7\n\t" \ | |
124 "paddsw %%mm6, %%mm0\n\t" \ | |
125 "paddsw %%mm6, %%mm1\n\t" \ | |
126 "paddsw %%mm6, %%mm2\n\t" \ | |
127 \ | |
128 /* pack and interleave even/odd pixels */ \ | |
129 "packuswb %%mm0, %%mm0\n\t" \ | |
130 "packuswb %%mm1, %%mm1\n\t" \ | |
131 "packuswb %%mm2, %%mm2\n\t" \ | |
132 "packuswb %%mm3, %%mm3\n\t" \ | |
133 "packuswb %%mm5, %%mm5\n\t" \ | |
134 "packuswb %%mm7, %%mm7\n\t" \ | |
135 "punpcklbw %%mm3, %%mm0\n\t" \ | |
136 "punpcklbw %%mm5, %%mm1\n\t" \ | |
137 "punpcklbw %%mm7, %%mm2\n\t" \ | |
138 | |
139 #define YUV2RGB_ENDLOOP(depth) \ | |
140 "movq 8 (%5, %0, 2), %%mm6\n\t" \ | |
141 "movd 4 (%3, %0), %%mm1\n\t" \ | |
142 "movd 4 (%2, %0), %%mm0\n\t" \ | |
143 "add $"AV_STRINGIFY(depth * 8)", %1\n\t" \ | |
144 "add $4, %0\n\t" \ | |
145 "js 1b\n\t" \ | |
146 | |
147 #define YUV2RGB_OPERANDS \ | |
148 : "+r" (index), "+r" (image) \ | |
149 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ | |
150 "r" (py - 2*index) \ | |
151 ); \ | |
152 } \ | |
153 | |
154 #define YUV2RGB_OPERANDS_ALPHA \ | |
155 : "+r" (index), "+r" (image) \ | |
156 : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ | |
157 "r" (py - 2*index), "r" (pa - 2*index) \ | |
158 ); \ | |
159 } \ | |
160 | |
161 #define YUV2RGB_ENDFUNC \ | |
162 __asm__ volatile (SFENCE"\n\t"EMMS); \ | |
163 return srcSliceH; \ | |
164 | |
165 | |
166 #define RGB_PACK16(gmask, gshift, rshift) \ | |
167 "pand "MANGLE(mmx_redmask)", %%mm0\n\t" \ | |
168 "pand "MANGLE(mmx_redmask)", %%mm1\n\t" \ | |
169 "psrlw $3, %%mm0\n\t" \ | |
170 "pand "MANGLE(gmask)", %%mm2\n\t" \ | |
171 "movq %%mm0, %%mm5\n\t" \ | |
172 "movq %%mm1, %%mm6\n\t" \ | |
173 "movq %%mm2, %%mm7\n\t" \ | |
174 "punpcklbw %%mm4, %%mm0\n\t" \ | |
175 "punpcklbw %%mm4, %%mm1\n\t" \ | |
176 "punpcklbw %%mm4, %%mm2\n\t" \ | |
177 "punpckhbw %%mm4, %%mm5\n\t" \ | |
178 "punpckhbw %%mm4, %%mm6\n\t" \ | |
179 "punpckhbw %%mm4, %%mm7\n\t" \ | |
180 "psllw $"rshift", %%mm1\n\t" \ | |
181 "psllw $"rshift", %%mm6\n\t" \ | |
182 "psllw $"gshift", %%mm2\n\t" \ | |
183 "psllw $"gshift", %%mm7\n\t" \ | |
184 "por %%mm1, %%mm0\n\t" \ | |
185 "por %%mm6, %%mm5\n\t" \ | |
186 "por %%mm2, %%mm0\n\t" \ | |
187 "por %%mm7, %%mm5\n\t" \ | |
188 MOVNTQ " %%mm0, (%1)\n\t" \ | |
189 MOVNTQ " %%mm5, 8(%1)\n\t" \ | |
190 | |
191 #define DITHER_RGB \ | |
192 "paddusb "BLUE_DITHER"(%4), %%mm0\n\t" \ | |
193 "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ | |
194 "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ | |
195 | |
196 static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], | |
197 int srcStride[], | |
198 int srcSliceY, int srcSliceH, | |
199 uint8_t *dst[], int dstStride[]) | |
200 { | |
201 int y, h_size; | |
202 | |
203 YUV2RGB_LOOP(2) | |
204 | |
205 #ifdef DITHER1XBPP | |
206 c->blueDither = ff_dither8[y & 1]; | |
207 c->greenDither = ff_dither8[y & 1]; | |
208 c->redDither = ff_dither8[(y + 1) & 1]; | |
209 #endif | |
210 | |
211 YUV2RGB_INITIAL_LOAD | |
212 YUV2RGB | |
213 #ifdef DITHER1XBPP | |
214 DITHER_RGB | |
215 #endif | |
216 RGB_PACK16(mmx_redmask, "2", "7") | |
217 | |
218 YUV2RGB_ENDLOOP(2) | |
219 YUV2RGB_OPERANDS | |
220 YUV2RGB_ENDFUNC | |
221 } | |
222 | |
223 static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], | |
224 int srcStride[], | |
225 int srcSliceY, int srcSliceH, | |
226 uint8_t *dst[], int dstStride[]) | |
227 { | |
228 int y, h_size; | |
229 | |
230 YUV2RGB_LOOP(2) | |
231 | |
232 #ifdef DITHER1XBPP | |
233 c->blueDither = ff_dither8[y & 1]; | |
234 c->greenDither = ff_dither4[y & 1]; | |
235 c->redDither = ff_dither8[(y + 1) & 1]; | |
236 #endif | |
237 | |
238 YUV2RGB_INITIAL_LOAD | |
239 YUV2RGB | |
240 #ifdef DITHER1XBPP | |
241 DITHER_RGB | |
242 #endif | |
243 RGB_PACK16(mmx_grnmask, "3", "8") | |
244 | |
245 YUV2RGB_ENDLOOP(2) | |
246 YUV2RGB_OPERANDS | |
247 YUV2RGB_ENDFUNC | |
248 } | |
249 | |
250 | |
251 #define RGB_PACK24(red, blue) \ | |
252 /* generate first packed RGB octet */ \ | |
253 "movq %%mm2, %%mm5\n\t" \ | |
254 "movq %%mm"blue", %%mm6\n\t" \ | |
255 "movq %%mm"red", %%mm7\n\t" \ | |
256 "punpcklbw %%mm5, %%mm6\n\t" \ | |
257 "punpcklbw %%mm4, %%mm7\n\t" \ | |
258 "movq %%mm6, %%mm3\n\t" \ | |
259 "punpcklwd %%mm7, %%mm6\n\t" \ | |
260 "psrlq $32, %%mm3\n\t" \ | |
261 "movq %%mm6, %%mm5\n\t" \ | |
262 "psllq $40, %%mm6\n\t" \ | |
263 "psllq $48, %%mm3\n\t" \ | |
264 "psrlq $32, %%mm5\n\t" \ | |
265 "psrlq $40, %%mm6\n\t" \ | |
266 "psllq $24, %%mm5\n\t" \ | |
267 "por %%mm3, %%mm6\n\t" \ | |
268 "por %%mm5, %%mm6\n\t" \ | |
269 MOVNTQ " %%mm6, (%1)\n\t" \ | |
270 \ | |
271 /* generate second packed RGB octet */ \ | |
272 "movq %%mm"red", %%mm7\n\t" \ | |
273 "movq %%mm2, %%mm5\n\t" \ | |
274 "movq %%mm"blue", %%mm6\n\t" \ | |
275 "punpcklbw %%mm4, %%mm7\n\t" \ | |
276 "punpcklbw %%mm5, %%mm6\n\t" \ | |
277 "movq %%mm7, %%mm3\n\t" \ | |
278 "punpckhwd %%mm7, %%mm6\n\t" \ | |
279 "psllq $16, %%mm3\n\t" \ | |
280 "psrlq $32, %%mm6\n\t" \ | |
281 "psrlq $48, %%mm3\n\t" \ | |
282 "psllq $8, %%mm6\n\t" \ | |
283 "movq %%mm"red", %%mm7\n\t" \ | |
284 "por %%mm6, %%mm3\n\t" \ | |
285 "movq %%mm"blue", %%mm6\n\t" \ | |
286 "movq %%mm2, %%mm5\n\t" \ | |
287 "punpckhbw %%mm4, %%mm7\n\t" \ | |
288 "punpckhbw %%mm5, %%mm6\n\t" \ | |
289 "movq %%mm6, %%mm5\n\t" \ | |
290 "punpcklwd %%mm7, %%mm6\n\t" \ | |
291 "psrlq $16, %%mm5\n\t" \ | |
292 "psllq $56, %%mm5\n\t" \ | |
293 "por %%mm5, %%mm3\n\t" \ | |
294 "psllq $32, %%mm6\n\t" \ | |
295 "por %%mm6, %%mm3\n\t" \ | |
296 MOVNTQ " %%mm3, 8(%1)\n\t" \ | |
297 \ | |
298 /* generate third packed RGB octet */ \ | |
299 "movq %%mm"red", %%mm7\n\t" \ | |
300 "movq %%mm2, %%mm5\n\t" \ | |
301 "movq %%mm2, %%mm3\n\t" \ | |
302 "movq %%mm"blue", %%mm6\n\t" \ | |
303 "punpckhbw %%mm"red", %%mm3\n\t" \ | |
304 "punpckhbw %%mm4, %%mm7\n\t" \ | |
305 "psllq $32, %%mm3\n\t" \ | |
306 "punpckhbw %%mm5, %%mm6\n\t" \ | |
307 "psrlq $48, %%mm3\n\t" \ | |
308 "punpckhwd %%mm7, %%mm6\n\t" \ | |
309 "movq %%mm6, %%mm7\n\t" \ | |
310 "psrlq $32, %%mm6\n\t" \ | |
311 "psllq $32, %%mm7\n\t" \ | |
312 "psllq $40, %%mm6\n\t" \ | |
313 "psrlq $16, %%mm7\n\t" \ | |
314 "por %%mm6, %%mm3\n\t" \ | |
315 "por %%mm7, %%mm3\n\t" \ | |
316 MOVNTQ " %%mm3, 16(%1)\n\t" \ | |
317 | |
318 static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], | |
319 int srcStride[], | |
320 int srcSliceY, int srcSliceH, | |
321 uint8_t *dst[], int dstStride[]) | |
322 { | |
323 int y, h_size; | |
324 | |
325 YUV2RGB_LOOP(3) | |
326 | |
327 YUV2RGB_INITIAL_LOAD | |
328 YUV2RGB | |
329 RGB_PACK24(REG_BLUE, REG_RED) | |
330 | |
331 YUV2RGB_ENDLOOP(3) | |
332 YUV2RGB_OPERANDS | |
333 YUV2RGB_ENDFUNC | |
334 } | |
335 | |
336 static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], | |
337 int srcStride[], | |
338 int srcSliceY, int srcSliceH, | |
339 uint8_t *dst[], int dstStride[]) | |
340 { | |
341 int y, h_size; | |
342 | |
343 YUV2RGB_LOOP(3) | |
344 | |
345 YUV2RGB_INITIAL_LOAD | |
346 YUV2RGB | |
347 RGB_PACK24(REG_RED, REG_BLUE) | |
348 | |
349 YUV2RGB_ENDLOOP(3) | |
350 YUV2RGB_OPERANDS | |
351 YUV2RGB_ENDFUNC | |
352 } | |
353 | |
354 | |
355 #define SET_EMPTY_ALPHA \ | |
356 "pcmpeqd %%mm"REG_ALPHA", %%mm"REG_ALPHA"\n\t" /* set alpha to 0xFF */ \ | |
357 | |
358 #define LOAD_ALPHA \ | |
359 "movq (%6, %0, 2), %%mm"REG_ALPHA"\n\t" \ | |
360 | |
361 #define RGB_PACK32(red, green, blue, alpha) \ | |
362 "movq %%mm"blue", %%mm5\n\t" \ | |
363 "movq %%mm"red", %%mm6\n\t" \ | |
364 "punpckhbw %%mm"green", %%mm5\n\t" \ | |
365 "punpcklbw %%mm"green", %%mm"blue"\n\t" \ | |
366 "punpckhbw %%mm"alpha", %%mm6\n\t" \ | |
367 "punpcklbw %%mm"alpha", %%mm"red"\n\t" \ | |
368 "movq %%mm"blue", %%mm"green"\n\t" \ | |
369 "movq %%mm5, %%mm"alpha"\n\t" \ | |
370 "punpcklwd %%mm"red", %%mm"blue"\n\t" \ | |
371 "punpckhwd %%mm"red", %%mm"green"\n\t" \ | |
372 "punpcklwd %%mm6, %%mm5\n\t" \ | |
373 "punpckhwd %%mm6, %%mm"alpha"\n\t" \ | |
374 MOVNTQ " %%mm"blue", 0(%1)\n\t" \ | |
375 MOVNTQ " %%mm"green", 8(%1)\n\t" \ | |
376 MOVNTQ " %%mm5, 16(%1)\n\t" \ | |
377 MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ | |
378 | |
379 static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], | |
380 int srcStride[], | |
381 int srcSliceY, int srcSliceH, | |
382 uint8_t *dst[], int dstStride[]) | |
383 { | |
384 int y, h_size; | |
385 | |
386 YUV2RGB_LOOP(4) | |
387 | |
388 YUV2RGB_INITIAL_LOAD | |
389 YUV2RGB | |
390 SET_EMPTY_ALPHA | |
391 RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA) | |
392 | |
393 YUV2RGB_ENDLOOP(4) | |
394 YUV2RGB_OPERANDS | |
395 YUV2RGB_ENDFUNC | |
396 } | |
397 | |
398 static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], | |
399 int srcStride[], | |
400 int srcSliceY, int srcSliceH, | |
401 uint8_t *dst[], int dstStride[]) | |
402 { | |
403 #if HAVE_7REGS | |
404 int y, h_size; | |
405 | |
406 YUV2RGB_LOOP(4) | |
407 | |
408 const uint8_t *pa = src[3] + y * srcStride[3]; | |
409 YUV2RGB_INITIAL_LOAD | |
410 YUV2RGB | |
411 LOAD_ALPHA | |
412 RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA) | |
413 | |
414 YUV2RGB_ENDLOOP(4) | |
415 YUV2RGB_OPERANDS_ALPHA | |
416 YUV2RGB_ENDFUNC | |
417 #endif | |
418 } | |
419 | |
420 static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], | |
421 int srcStride[], | |
422 int srcSliceY, int srcSliceH, | |
423 uint8_t *dst[], int dstStride[]) | |
424 { | |
425 int y, h_size; | |
426 | |
427 YUV2RGB_LOOP(4) | |
428 | |
429 YUV2RGB_INITIAL_LOAD | |
430 YUV2RGB | |
431 SET_EMPTY_ALPHA | |
432 RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA) | |
433 | |
434 YUV2RGB_ENDLOOP(4) | |
435 YUV2RGB_OPERANDS | |
436 YUV2RGB_ENDFUNC | |
437 } | |
438 | |
439 static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], | |
440 int srcStride[], | |
441 int srcSliceY, int srcSliceH, | |
442 uint8_t *dst[], int dstStride[]) | |
443 { | |
444 #if HAVE_7REGS | |
445 int y, h_size; | |
446 | |
447 YUV2RGB_LOOP(4) | |
448 | |
449 const uint8_t *pa = src[3] + y * srcStride[3]; | |
450 YUV2RGB_INITIAL_LOAD | |
451 YUV2RGB | |
452 LOAD_ALPHA | |
453 RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA) | |
454 | |
455 YUV2RGB_ENDLOOP(4) | |
456 YUV2RGB_OPERANDS_ALPHA | |
457 YUV2RGB_ENDFUNC | |
458 #endif | |
459 } |