annotate postproc/yuv2rgb_template.c @ 18715:30d7ddf08889

Fix window position when changing videos while in fullscreen and for window managers that modify position on Map. Oked by Alexander Strasser.
author reimar
date Thu, 15 Jun 2006 08:00:37 +0000
parents 821f464b4d90
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
1
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
2 /*
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
3 * yuv2rgb_mmx.c, Software YUV to RGB coverter with Intel MMX "technology"
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
4 *
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
5 * Copyright (C) 2000, Silicon Integrated System Corp.
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
6 * All Rights Reserved.
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
7 *
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
8 * Author: Olie Lho <ollie@sis.com.tw>
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
9 *
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
10 * This file is part of mpeg2dec, a free MPEG-2 video decoder
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
11 *
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
12 * mpeg2dec is free software; you can redistribute it and/or modify
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
13 * it under the terms of the GNU General Public License as published by
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
14 * the Free Software Foundation; either version 2, or (at your option)
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
15 * any later version.
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
16 *
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
17 * mpeg2dec is distributed in the hope that it will be useful,
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
20 * GNU General Public License for more details.
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
21 *
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
22 * You should have received a copy of the GNU General Public License
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
23 * along with GNU Make; see the file COPYING. If not, write to
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
24 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
25 *
2749
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
26 * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
3143
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
27 * MMX/MMX2 Template stuff from Michael Niedermayer (needed for fast movntq support)
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
28 * context / deglobalize stuff by Michael Niedermayer
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
29 */
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
30
3143
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
31 #undef MOVNTQ
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
32 #undef EMMS
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
33 #undef SFENCE
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
34
3143
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
35 #ifdef HAVE_3DNOW
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
36 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
37 #define EMMS "femms"
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
38 #else
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
39 #define EMMS "emms"
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
40 #endif
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
41
3143
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
42 #ifdef HAVE_MMX2
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
43 #define MOVNTQ "movntq"
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
44 #define SFENCE "sfence"
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
45 #else
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
46 #define MOVNTQ "movq"
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
47 #define SFENCE "/nop"
86910f54c391 runtime cpu detection
michael
parents: 2749
diff changeset
48 #endif
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
49
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
50 #define YUV2RGB \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
51 /* Do the multiply part of the conversion for even and odd pixels,
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
52 register usage:
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
53 mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
54 mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
55 mm6 -> Y even, mm7 -> Y odd */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
56 /* convert the chroma part */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
57 "punpcklbw %%mm4, %%mm0;" /* scatter 4 Cb 00 u3 00 u2 00 u1 00 u0 */ \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
58 "punpcklbw %%mm4, %%mm1;" /* scatter 4 Cr 00 v3 00 v2 00 v1 00 v0 */ \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
59 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
60 "psllw $3, %%mm0;" /* Promote precision */ \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
61 "psllw $3, %%mm1;" /* Promote precision */ \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
62 \
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
63 "psubsw "U_OFFSET"(%4), %%mm0;" /* Cb -= 128 */ \
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
64 "psubsw "V_OFFSET"(%4), %%mm1;" /* Cr -= 128 */ \
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
65 \
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
66 "movq %%mm0, %%mm2;" /* Copy 4 Cb 00 u3 00 u2 00 u1 00 u0 */ \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
67 "movq %%mm1, %%mm3;" /* Copy 4 Cr 00 v3 00 v2 00 v1 00 v0 */ \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
68 \
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
69 "pmulhw "UG_COEFF"(%4), %%mm2;" /* Mul Cb with green coeff -> Cb green */ \
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
70 "pmulhw "VG_COEFF"(%4), %%mm3;" /* Mul Cr with green coeff -> Cr green */ \
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
71 \
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
72 "pmulhw "UB_COEFF"(%4), %%mm0;" /* Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0 */\
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
73 "pmulhw "VR_COEFF"(%4), %%mm1;" /* Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0 */\
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
74 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
75 "paddsw %%mm3, %%mm2;" /* Cb green + Cr green -> Cgreen */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
76 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
77 /* convert the luma part */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
78 "movq %%mm6, %%mm7;" /* Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */\
4285
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
79 "pand "MANGLE(mmx_00ffw)", %%mm6;" /* get Y even 00 Y6 00 Y4 00 Y2 00 Y0 */\
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
80 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
81 "psrlw $8, %%mm7;" /* get Y odd 00 Y7 00 Y5 00 Y3 00 Y1 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
82 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
83 "psllw $3, %%mm6;" /* Promote precision */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
84 "psllw $3, %%mm7;" /* Promote precision */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
85 \
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
86 "psubw "Y_OFFSET"(%4), %%mm6;" /* Y -= 16 */\
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
87 "psubw "Y_OFFSET"(%4), %%mm7;" /* Y -= 16 */\
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
88 \
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
89 "pmulhw "Y_COEFF"(%4), %%mm6;" /* Mul 4 Y even 00 y6 00 y4 00 y2 00 y0 */\
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
90 "pmulhw "Y_COEFF"(%4), %%mm7;" /* Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1 */\
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
91 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
92 /* Do the addition part of the conversion for even and odd pixels,
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
93 register usage:
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
94 mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
95 mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels,
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
96 mm6 -> Y even, mm7 -> Y odd */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
97 "movq %%mm0, %%mm3;" /* Copy Cblue */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
98 "movq %%mm1, %%mm4;" /* Copy Cred */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
99 "movq %%mm2, %%mm5;" /* Copy Cgreen */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
100 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
101 "paddsw %%mm6, %%mm0;" /* Y even + Cblue 00 B6 00 B4 00 B2 00 B0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
102 "paddsw %%mm7, %%mm3;" /* Y odd + Cblue 00 B7 00 B5 00 B3 00 B1 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
103 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
104 "paddsw %%mm6, %%mm1;" /* Y even + Cred 00 R6 00 R4 00 R2 00 R0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
105 "paddsw %%mm7, %%mm4;" /* Y odd + Cred 00 R7 00 R5 00 R3 00 R1 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
106 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
107 "paddsw %%mm6, %%mm2;" /* Y even + Cgreen 00 G6 00 G4 00 G2 00 G0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
108 "paddsw %%mm7, %%mm5;" /* Y odd + Cgreen 00 G7 00 G5 00 G3 00 G1 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
109 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
110 /* Limit RGB even to 0..255 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
111 "packuswb %%mm0, %%mm0;" /* B6 B4 B2 B0 B6 B4 B2 B0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
112 "packuswb %%mm1, %%mm1;" /* R6 R4 R2 R0 R6 R4 R2 R0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
113 "packuswb %%mm2, %%mm2;" /* G6 G4 G2 G0 G6 G4 G2 G0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
114 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
115 /* Limit RGB odd to 0..255 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
116 "packuswb %%mm3, %%mm3;" /* B7 B5 B3 B1 B7 B5 B3 B1 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
117 "packuswb %%mm4, %%mm4;" /* R7 R5 R3 R1 R7 R5 R3 R1 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
118 "packuswb %%mm5, %%mm5;" /* G7 G5 G3 G1 G7 G5 G3 G1 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
119 \
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
120 /* Interleave RGB even and odd */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
121 "punpcklbw %%mm3, %%mm0;" /* B7 B6 B5 B4 B3 B2 B1 B0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
122 "punpcklbw %%mm4, %%mm1;" /* R7 R6 R5 R4 R3 R2 R1 R0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
123 "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
124
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
125
9499
bc5b87370cd1 cleanup
michael
parents: 9494
diff changeset
126 static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
127 int srcSliceH, uint8_t* dst[], int dstStride[]){
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
128 int y, h_size;
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
129
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
130 if(c->srcFormat == IMGFMT_422P){
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
131 srcStride[1] *= 2;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
132 srcStride[2] *= 2;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
133 }
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
134
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
135 h_size= (c->dstW+7)&~7;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
136 if(h_size*2 > dstStride[0]) h_size-=8;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
137
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
138 __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
139 //printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0],
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
140 //srcStride[0],srcStride[1],srcStride[2],dstStride[0]);
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
141 for (y= 0; y<srcSliceH; y++ ) {
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
142 uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
143 uint8_t *_py = src[0] + y*srcStride[0];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
144 uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
145 uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
146 long index= -h_size/2;
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
147
2749
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
148 b5Dither= dither8[y&1];
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
149 g6Dither= dither4[y&1];
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
150 g5Dither= dither8[y&1];
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
151 r5Dither= dither8[(y+1)&1];
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
152 /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
153 pixels in each iteration */
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
154 __asm__ __volatile__ (
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
155 /* load data for start of next scan line */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
156 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
157 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
158 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
159 // ".balign 16 \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
160 "1: \n\t"
2736
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
161 /* no speed diference on my p3@500 with prefetch,
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
162 * if it is faster for anyone with -benchmark then tell me
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
163 PREFETCH" 64(%0) \n\t"
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
164 PREFETCH" 64(%1) \n\t"
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
165 PREFETCH" 64(%2) \n\t"
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
166 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
167 YUV2RGB
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
168
2749
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
169 #ifdef DITHER1XBPP
4285
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
170 "paddusb "MANGLE(b5Dither)", %%mm0;"
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
171 "paddusb "MANGLE(g6Dither)", %%mm2;"
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
172 "paddusb "MANGLE(r5Dither)", %%mm1;"
2749
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
173 #endif
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
174 /* mask unneeded bits off */
4285
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
175 "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
176 "pand "MANGLE(mmx_grnmask)", %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
177 "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
178
2736
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
179 "psrlw $3,%%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
180 "pxor %%mm4, %%mm4;" /* zero mm4 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
181
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
182 "movq %%mm0, %%mm5;" /* Copy B7-B0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
183 "movq %%mm2, %%mm7;" /* Copy G7-G0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
184
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
185 /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
186 "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
187 "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
188
2736
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
189 "psllw $3, %%mm2;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
190 "por %%mm2, %%mm0;" /* r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
191
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
192 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
193 MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
194
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
195 /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
196 "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
197 "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
198
2736
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
199 "psllw $3, %%mm7;" /* 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
200 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
201
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
202 "por %%mm7, %%mm5;" /* r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
203 "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
204
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
205 MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
206
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
207 "add $16, %1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
208 "add $4, %0 \n\t"
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
209 " js 1b \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
210
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
211 : "+r" (index), "+r" (_image)
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
212 : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
213 );
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
214 }
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
215
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
216 __asm__ __volatile__ (EMMS);
9494
543ab3909b78 sws_ prefix, more seperation between internal & external swscaler API
michael
parents: 9476
diff changeset
217
543ab3909b78 sws_ prefix, more seperation between internal & external swscaler API
michael
parents: 9476
diff changeset
218 return srcSliceH;
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
219 }
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
220
9499
bc5b87370cd1 cleanup
michael
parents: 9494
diff changeset
221 static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
222 int srcSliceH, uint8_t* dst[], int dstStride[]){
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
223 int y, h_size;
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
224
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
225 if(c->srcFormat == IMGFMT_422P){
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
226 srcStride[1] *= 2;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
227 srcStride[2] *= 2;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
228 }
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
229
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
230 h_size= (c->dstW+7)&~7;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
231 if(h_size*2 > dstStride[0]) h_size-=8;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
232
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
233 __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
234 //printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0],
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
235 //srcStride[0],srcStride[1],srcStride[2],dstStride[0]);
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
236 for (y= 0; y<srcSliceH; y++ ) {
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
237 uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
238 uint8_t *_py = src[0] + y*srcStride[0];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
239 uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
240 uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
241 long index= -h_size/2;
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
242
2749
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
243 b5Dither= dither8[y&1];
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
244 g6Dither= dither4[y&1];
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
245 g5Dither= dither8[y&1];
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
246 r5Dither= dither8[(y+1)&1];
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
247 /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
248 pixels in each iteration */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
249 __asm__ __volatile__ (
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
250 /* load data for start of next scan line */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
251 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
252 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
253 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
254 // ".balign 16 \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
255 "1: \n\t"
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
256 YUV2RGB
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
257
2749
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
258 #ifdef DITHER1XBPP
4285
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
259 "paddusb "MANGLE(b5Dither)", %%mm0 \n\t"
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
260 "paddusb "MANGLE(g5Dither)", %%mm2 \n\t"
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
261 "paddusb "MANGLE(r5Dither)", %%mm1 \n\t"
2749
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
262 #endif
2cbecedb2616 15/16 bpp dithering
michael
parents: 2739
diff changeset
263
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
264 /* mask unneeded bits off */
4285
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
265 "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
266 "pand "MANGLE(mmx_redmask)", %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
267 "pand "MANGLE(mmx_redmask)", %%mm1;" /* r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0 */
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
268
2736
aeef41a19f4b 16bpp is 10% faster
michael
parents: 2735
diff changeset
269 "psrlw $3,%%mm0;" /* 0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3 */
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
270 "psrlw $1,%%mm1;" /* 0_r7r6r5 r4r3_0_0 0_r7r6r5 r4r3_0_0 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
271 "pxor %%mm4, %%mm4;" /* zero mm4 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
272
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
273 "movq %%mm0, %%mm5;" /* Copy B7-B0 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
274 "movq %%mm2, %%mm7;" /* Copy G7-G0 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
275
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
276 /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
277 "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
278 "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
279
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
280 "psllw $2, %%mm2;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
281 "por %%mm2, %%mm0;" /* 0_r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
282
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
283 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
284 MOVNTQ " %%mm0, (%1);" /* store pixel 0-3 */
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
285
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
286 /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
287 "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
288 "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
289
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
290 "psllw $2, %%mm7;" /* 0_0_0_0 0_0_g7g6 g5g4g3_0 0_0_0_0 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
291 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
292
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
293 "por %%mm7, %%mm5;" /* 0_r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
294 "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
295
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
296 MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
297
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
298 "add $16, %1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
299 "add $4, %0 \n\t"
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
300 " js 1b \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
301 : "+r" (index), "+r" (_image)
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
302 : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
303 );
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
304 }
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
305
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
306 __asm__ __volatile__ (EMMS);
9494
543ab3909b78 sws_ prefix, more seperation between internal & external swscaler API
michael
parents: 9476
diff changeset
307 return srcSliceH;
2735
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
308 }
0f4c3ae92857 15 bpp support
michael
parents: 2734
diff changeset
309
9499
bc5b87370cd1 cleanup
michael
parents: 9494
diff changeset
310 static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
311 int srcSliceH, uint8_t* dst[], int dstStride[]){
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
312 int y, h_size;
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
313
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
314 if(c->srcFormat == IMGFMT_422P){
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
315 srcStride[1] *= 2;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
316 srcStride[2] *= 2;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
317 }
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
318
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
319 h_size= (c->dstW+7)&~7;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
320 if(h_size*3 > dstStride[0]) h_size-=8;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
321
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
322 __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
323
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
324 for (y= 0; y<srcSliceH; y++ ) {
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
325 uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
326 uint8_t *_py = src[0] + y*srcStride[0];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
327 uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
328 uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
329 long index= -h_size/2;
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
330
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
331 /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
332 pixels in each iteration */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
333 __asm__ __volatile__ (
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
334 /* load data for start of next scan line */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
335 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
336 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
337 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
338 // ".balign 16 \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
339 "1: \n\t"
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
340 YUV2RGB
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
341 /* mm0=B, %%mm2=G, %%mm1=R */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
342 #ifdef HAVE_MMX2
4285
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
343 "movq "MANGLE(M24A)", %%mm4 \n\t"
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
344 "movq "MANGLE(M24C)", %%mm7 \n\t"
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
345 "pshufw $0x50, %%mm0, %%mm5 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
346 "pshufw $0x50, %%mm2, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
347 "pshufw $0x00, %%mm1, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
348
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
349 "pand %%mm4, %%mm5 \n\t" /* B2 B1 B0 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
350 "pand %%mm4, %%mm3 \n\t" /* G2 G1 G0 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
351 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
352
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
353 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
354 "por %%mm5, %%mm6 \n\t"
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
355 "por %%mm3, %%mm6 \n\t"
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
356 MOVNTQ" %%mm6, (%1) \n\t"
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
357
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
358 "psrlq $8, %%mm2 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
359 "pshufw $0xA5, %%mm0, %%mm5 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
360 "pshufw $0x55, %%mm2, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
361 "pshufw $0xA5, %%mm1, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
362
4285
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
363 "pand "MANGLE(M24B)", %%mm5 \n\t" /* B5 B4 B3 */
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
364 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
365 "pand %%mm4, %%mm6 \n\t" /* R4 R3 R2 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
366
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
367 "por %%mm5, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
368 "por %%mm3, %%mm6 \n\t"
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
369 MOVNTQ" %%mm6, 8(%1) \n\t"
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
370
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
371 "pshufw $0xFF, %%mm0, %%mm5 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
372 "pshufw $0xFA, %%mm2, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
373 "pshufw $0xFA, %%mm1, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
374 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
375
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
376 "pand %%mm7, %%mm5 \n\t" /* B7 B6 */
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
377 "pand %%mm4, %%mm3 \n\t" /* G7 G6 G5 */
4285
20806e535b96 add mangling
atmos4
parents: 3143
diff changeset
378 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
379 "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
380 \
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
381 "por %%mm5, %%mm3 \n\t"
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
382 "por %%mm3, %%mm6 \n\t"
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
383 MOVNTQ" %%mm6, 16(%1) \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
384 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
385 "pxor %%mm4, %%mm4 \n\t"
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
386
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
387 #else
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
388
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
389 "pxor %%mm4, %%mm4 \n\t"
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
390 "movq %%mm0, %%mm5 \n\t" /* B */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
391 "movq %%mm1, %%mm6 \n\t" /* R */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
392 "punpcklbw %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
393 "punpcklbw %%mm4, %%mm1 \n\t" /* 0R0R0R0R 0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
394 "punpckhbw %%mm2, %%mm5 \n\t" /* GBGBGBGB 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
395 "punpckhbw %%mm4, %%mm6 \n\t" /* 0R0R0R0R 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
396 "movq %%mm0, %%mm7 \n\t" /* GBGBGBGB 0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
397 "movq %%mm5, %%mm3 \n\t" /* GBGBGBGB 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
398 "punpcklwd %%mm1, %%mm7 \n\t" /* 0RGB0RGB 0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
399 "punpckhwd %%mm1, %%mm0 \n\t" /* 0RGB0RGB 1 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
400 "punpcklwd %%mm6, %%mm5 \n\t" /* 0RGB0RGB 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
401 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
402
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
403 "movq %%mm7, %%mm2 \n\t" /* 0RGB0RGB 0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
404 "movq %%mm0, %%mm6 \n\t" /* 0RGB0RGB 1 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
405 "movq %%mm5, %%mm1 \n\t" /* 0RGB0RGB 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
406 "movq %%mm3, %%mm4 \n\t" /* 0RGB0RGB 3 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
407
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
408 "psllq $40, %%mm7 \n\t" /* RGB00000 0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
409 "psllq $40, %%mm0 \n\t" /* RGB00000 1 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
410 "psllq $40, %%mm5 \n\t" /* RGB00000 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
411 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
412
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
413 "punpckhdq %%mm2, %%mm7 \n\t" /* 0RGBRGB0 0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
414 "punpckhdq %%mm6, %%mm0 \n\t" /* 0RGBRGB0 1 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
415 "punpckhdq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
416 "punpckhdq %%mm4, %%mm3 \n\t" /* 0RGBRGB0 3 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
417
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
418 "psrlq $8, %%mm7 \n\t" /* 00RGBRGB 0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
419 "movq %%mm0, %%mm6 \n\t" /* 0RGBRGB0 1 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
420 "psllq $40, %%mm0 \n\t" /* GB000000 1 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
421 "por %%mm0, %%mm7 \n\t" /* GBRGBRGB 0 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
422 MOVNTQ" %%mm7, (%1) \n\t"
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
423
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
424 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
425
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
426 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
427 "movq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
428 "psllq $24, %%mm5 \n\t" /* BRGB0000 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
429 "por %%mm5, %%mm6 \n\t" /* BRGBRGBR 1 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
430 MOVNTQ" %%mm6, 8(%1) \n\t"
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
431
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
432 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
433
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
434 "psrlq $40, %%mm1 \n\t" /* 000000RG 2 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
435 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
436 "por %%mm3, %%mm1 \n\t" /* RGBRGBRG 2 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
437 MOVNTQ" %%mm1, 16(%1) \n\t"
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
438
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
439 "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
440 "pxor %%mm4, %%mm4 \n\t"
2739
f365024806c0 mmx2 bgr24 stuff from swscale (slightly faster)
michael
parents: 2737
diff changeset
441 #endif
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
442
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
443 "add $24, %1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
444 "add $4, %0 \n\t"
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
445 " js 1b \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
446
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
447 : "+r" (index), "+r" (_image)
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
448 : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
449 );
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
450 }
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
451
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
452 __asm__ __volatile__ (EMMS);
9494
543ab3909b78 sws_ prefix, more seperation between internal & external swscaler API
michael
parents: 9476
diff changeset
453 return srcSliceH;
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
454 }
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
455
9499
bc5b87370cd1 cleanup
michael
parents: 9494
diff changeset
456 static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
457 int srcSliceH, uint8_t* dst[], int dstStride[]){
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
458 int y, h_size;
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
459
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
460 if(c->srcFormat == IMGFMT_422P){
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
461 srcStride[1] *= 2;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
462 srcStride[2] *= 2;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
463 }
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
464
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
465 h_size= (c->dstW+7)&~7;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
466 if(h_size*4 > dstStride[0]) h_size-=8;
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
467
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
468 __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
469
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
470 for (y= 0; y<srcSliceH; y++ ) {
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
471 uint8_t *_image = dst[0] + (y+srcSliceY)*dstStride[0];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
472 uint8_t *_py = src[0] + y*srcStride[0];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
473 uint8_t *_pu = src[1] + (y>>1)*srcStride[1];
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
474 uint8_t *_pv = src[2] + (y>>1)*srcStride[2];
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
475 long index= -h_size/2;
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
476
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
477 /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
478 pixels in each iteration */
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
479 __asm__ __volatile__ (
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
480 /* load data for start of next scan line */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
481 "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
482 "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
483 "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
484 // ".balign 16 \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
485 "1: \n\t"
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
486 YUV2RGB
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
487 /* convert RGB plane to RGB packed format,
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
488 mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0,
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
489 mm4 -> GB, mm5 -> AR pixel 4-7,
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
490 mm6 -> GB, mm7 -> AR pixel 0-3 */
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
491 "pxor %%mm3, %%mm3;" /* zero mm3 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
492
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
493 "movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
494 "movq %%mm1, %%mm7;" /* R7 R6 R5 R4 R3 R2 R1 R0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
495
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
496 "movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
497 "movq %%mm1, %%mm5;" /* R7 R6 R5 R4 R3 R2 R1 R0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
498
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
499 "punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
500 "punpcklbw %%mm3, %%mm7;" /* 00 R3 00 R2 00 R1 00 R0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
501
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
502 "punpcklwd %%mm7, %%mm6;" /* 00 R1 B1 G1 00 R0 B0 G0 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
503 MOVNTQ " %%mm6, (%1);" /* Store ARGB1 ARGB0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
504
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
505 "movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
506 "punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
507
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
508 "punpckhwd %%mm7, %%mm6;" /* 00 R3 G3 B3 00 R2 B3 G2 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
509 MOVNTQ " %%mm6, 8 (%1);" /* Store ARGB3 ARGB2 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
510
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
511 "punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
512 "punpckhbw %%mm3, %%mm5;" /* 00 R7 00 R6 00 R5 00 R4 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
513
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
514 "punpcklwd %%mm5, %%mm4;" /* 00 R5 B5 G5 00 R4 B4 G4 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
515 MOVNTQ " %%mm4, 16 (%1);" /* Store ARGB5 ARGB4 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
516
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
517 "movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
518 "punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
519
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
520 "punpckhwd %%mm5, %%mm4;" /* 00 R7 G7 B7 00 R6 B6 G6 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
521 MOVNTQ " %%mm4, 24 (%1);" /* Store ARGB7 ARGB6 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
522
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
523 "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
524 "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
525
2734
4ce165aa0135 common code moved to a #define
michael
parents: 2732
diff changeset
526 "pxor %%mm4, %%mm4;" /* zero mm4 */
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
527 "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
528
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
529 "add $32, %1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 9499
diff changeset
530 "add $4, %0 \n\t"
9476
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
531 " js 1b \n\t"
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
532
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
533 : "+r" (index), "+r" (_image)
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
534 : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
eff727517e6b yuv2rgb brightness/contrast/saturation/different colorspaces support finished
michael
parents: 9392
diff changeset
535 );
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
536 }
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
537
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
538 __asm__ __volatile__ (EMMS);
9494
543ab3909b78 sws_ prefix, more seperation between internal & external swscaler API
michael
parents: 9476
diff changeset
539 return srcSliceH;
2732
ae79207a3055 Move yuv2rgb to postprocess
nick
parents:
diff changeset
540 }