annotate libswscale/swscale_template.c @ 19619:a83e5b8d2e63

Patch from Karolina Lindqvist <karolina.lindqvist@kramnet.se> "There is a bug in the zoran -vo zr driver, that makes the output garbled always. It also probably affects the zrmjpeg filter. This patch takes care of the problem." Patch tested and OK. And 10l to me, because this bug probably has existed for a looong time.
author rik
date Fri, 01 Sep 2006 18:49:40 +0000
parents 4678e9f81334
children 8e50cba9fe03
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1 /*
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2 Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
4 This program is free software; you can redistribute it and/or modify
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
5 it under the terms of the GNU General Public License as published by
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
6 the Free Software Foundation; either version 2 of the License, or
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
7 (at your option) any later version.
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
8
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
9 This program is distributed in the hope that it will be useful,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
12 GNU General Public License for more details.
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
13
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
14 You should have received a copy of the GNU General Public License
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
15 along with this program; if not, write to the Free Software
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19594
4678e9f81334 make the C code of the swscaler which i wrote LGPL
michael
parents: 19400
diff changeset
17
4678e9f81334 make the C code of the swscaler which i wrote LGPL
michael
parents: 19400
diff changeset
18 the C code (not assembly, mmx, ...) of the swscaler which has been written
4678e9f81334 make the C code of the swscaler which i wrote LGPL
michael
parents: 19400
diff changeset
19 by Michael Niedermayer can be used under the LGPL license too
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
20 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
21
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
22 #undef REAL_MOVNTQ
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
23 #undef MOVNTQ
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
24 #undef PAVGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
25 #undef PREFETCH
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
26 #undef PREFETCHW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
27 #undef EMMS
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
28 #undef SFENCE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
29
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
30 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
31 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
32 #define EMMS "femms"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
33 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
34 #define EMMS "emms"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
35 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
36
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
37 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
38 #define PREFETCH "prefetch"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
39 #define PREFETCHW "prefetchw"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
40 #elif defined ( HAVE_MMX2 )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
41 #define PREFETCH "prefetchnta"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
42 #define PREFETCHW "prefetcht0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
43 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
44 #define PREFETCH "/nop"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
45 #define PREFETCHW "/nop"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
46 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
47
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
48 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
49 #define SFENCE "sfence"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
50 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
51 #define SFENCE "/nop"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
52 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
53
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
54 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
55 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
56 #elif defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
57 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
58 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
59
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
60 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
61 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
62 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
63 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
64 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
65 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
66
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
67 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
68 #include "swscale_altivec_template.c"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
69 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
70
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
71 #define YSCALEYUV2YV12X(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
72 asm volatile(\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
73 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
74 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
75 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
76 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
77 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
78 ASMALIGN(4) /* FIXME Unroll? */\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
79 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
80 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
81 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
82 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
83 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
84 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
85 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
86 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
87 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
88 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
89 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
90 " jnz 1b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
91 "psraw $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
92 "psraw $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
93 "packuswb %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
94 MOVNTQ(%%mm3, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
95 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
96 "cmp %2, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
97 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
98 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
99 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
100 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
101 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
102 :: "r" (&c->redDither),\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
103 "r" (dest), "p" (width)\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
104 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
105 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
106
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
107 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
108 asm volatile(\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
109 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
110 "xor %%"REG_a", %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
111 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
112 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
113 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
114 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
115 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
116 ASMALIGN(4) \
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
117 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
118 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
119 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
120 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
121 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
122 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
123 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
124 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
125 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
126 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
127 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
128 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
129 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
130 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
131 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
132 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
133 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
134 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
135 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
136 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
137 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
138 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
139 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
140 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
141 " jnz 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
142 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
143 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
144 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
145 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
146 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
147 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
148 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
149 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
150 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
151 "psraw $3, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
152 "psraw $3, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
153 "packuswb %%mm6, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
154 MOVNTQ(%%mm4, (%1, %%REGa))\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
155 "add $8, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
156 "cmp %2, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
157 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
158 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
159 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
160 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
161 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
162 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
163 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
164 :: "r" (&c->redDither),\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
165 "r" (dest), "p" (width)\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
166 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
167 );
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
168
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
169 #define YSCALEYUV2YV121 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
170 "mov %2, %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
171 ASMALIGN(4) /* FIXME Unroll? */\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
172 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
173 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
174 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
175 "psraw $7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
176 "psraw $7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
177 "packuswb %%mm1, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
178 MOVNTQ(%%mm0, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
179 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
180 "jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
181
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
182 /*
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
183 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
184 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
185 "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
186 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
187 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
188 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
189 #define YSCALEYUV2PACKEDX \
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
190 asm volatile(\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
191 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
192 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
193 "nop \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
194 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
195 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
196 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
197 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
198 "movq %%mm3, %%mm4 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
199 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
200 "2: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
201 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
202 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
203 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
204 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
205 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
206 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
207 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
208 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
209 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
210 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
211 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
212 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
213 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
214 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
215 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
216 "movq %%mm1, %%mm7 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
217 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
218 "2: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
219 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
220 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
221 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
222 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
223 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
224 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
225 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
226 "paddw %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
227 "paddw %%mm5, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
228 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
229 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
230
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
231 #define YSCALEYUV2PACKEDX_END\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
232 :: "r" (&c->redDither), \
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
233 "m" (dummy), "m" (dummy), "m" (dummy),\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
234 "r" (dest), "m" (dstW)\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
235 : "%"REG_a, "%"REG_d, "%"REG_S\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
236 );
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
237
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
238 #define YSCALEYUV2PACKEDX_ACCURATE \
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
239 asm volatile(\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
240 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
241 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
242 "nop \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
243 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
244 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
245 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
246 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
247 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
248 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
249 "pxor %%mm7, %%mm7 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
250 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
251 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
252 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
253 "movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
254 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
255 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
256 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
257 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
258 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
259 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
260 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
261 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
262 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
263 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
264 "movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
265 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
266 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
267 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
268 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
269 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
270 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
271 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
272 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
273 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
274 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
275 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
276 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
277 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
278 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
279 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
280 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
281 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
282 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
283 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
284 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
285 "movq %%mm4, "U_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
286 "movq %%mm6, "V_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
287 \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
288 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
289 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
290 "pxor %%mm1, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
291 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
292 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
293 "pxor %%mm6, %%mm6 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
294 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
295 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
296 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
297 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
298 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
299 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
300 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
301 "punpcklwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
302 "punpckhwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
303 "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
304 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
305 "pmaddwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
306 "paddd %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
307 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
308 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
309 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
310 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
311 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
312 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
313 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
314 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
315 "pmaddwd %%mm4, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
316 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
317 "paddd %%mm2, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
318 "paddd %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
319 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
320 "psrad $16, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
321 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
322 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
323 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
324 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
325 "packssdw %%mm5, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
326 "packssdw %%mm6, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
327 "paddw %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
328 "paddw %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
329 "movq "U_TEMP"(%0), %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
330 "movq "V_TEMP"(%0), %%mm4 \n\t"\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
331
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
332 #define YSCALEYUV2RGBX \
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
333 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
334 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
335 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
336 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
337 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
338 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
339 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
340 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
341 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
342 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
343 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
344 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
345 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
346 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
347 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
348 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
349 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
350 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
351 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
352 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
353 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
354 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
355 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
356 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
357 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
358 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
359 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
360 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
361 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
362 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
363 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
364 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
365 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
366 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
367 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
368 #if 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
369 #define FULL_YSCALEYUV2RGB \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
370 "pxor %%mm7, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
371 "movd %6, %%mm6 \n\t" /*yalpha1*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
372 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
373 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
374 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
375 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
376 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
377 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
378 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
379 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
380 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
381 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
382 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
383 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
384 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
385 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
386 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
387 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
388 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
389 "movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
390 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
391 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
392 "movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
393 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
394 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
395 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
396 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
397 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
398 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
399 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
400 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
401 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
402 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
403 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
404 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
405 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
406 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
407 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
408 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
409 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
410 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
411 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
412 "paddw %%mm1, %%mm3 \n\t" /* B*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
413 "paddw %%mm1, %%mm0 \n\t" /* R*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
414 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
415 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
416 "packuswb %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
417 "paddw %%mm4, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
418 "paddw %%mm2, %%mm1 \n\t" /* G*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
419 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
420 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
421 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
422
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
423 #define REAL_YSCALEYUV2PACKED(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
424 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
425 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
426 "psraw $3, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
427 "psraw $3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
428 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
429 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
430 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
431 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
432 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
433 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
434 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
435 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
436 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
437 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
438 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
439 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
440 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
441 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
442 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
443 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
444 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
445 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
446 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
447 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
448 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
449 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
450 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
451 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
452 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
453 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
454 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
455 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
456 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
457 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
458
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
459 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
460
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
461 #define REAL_YSCALEYUV2RGB(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
462 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
463 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
464 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
465 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
466 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
467 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
468 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
469 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
470 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
471 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
472 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
473 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
474 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
475 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
476 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
477 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
478 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
479 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
480 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
481 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
482 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
483 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
484 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
485 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
486 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
487 "movq 8(%0, "#index", 2), %%mm6\n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
488 "movq 8(%1, "#index", 2), %%mm7\n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
489 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
490 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
491 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
492 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
493 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
494 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
495 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
496 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
497 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
498 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
499 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
500 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
501 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
502 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
503 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
504 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
505 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
506 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
507 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
508 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
509 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
510 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
511 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
512 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
513 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
514 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
515 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
516 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
517 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
518 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
519 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
520 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
521 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
522 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
523 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
524 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
525 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
526
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
527 #define REAL_YSCALEYUV2PACKED1(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
528 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
529 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
530 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
531 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
532 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
533 "psraw $7, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
534 "psraw $7, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
535 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
536 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
537 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
538 "psraw $7, %%mm7 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
539
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
540 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
541
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
542 #define REAL_YSCALEYUV2RGB1(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
543 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
544 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
545 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
546 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
547 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
548 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
549 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
550 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
551 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
552 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
553 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
554 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
555 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
556 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
557 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
558 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
559 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
560 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
561 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
562 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
563 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
564 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
565 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
566 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
567 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
568 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
569 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
570 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
571 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
572 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
573 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
574 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
575 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
576 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
577 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
578 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
579 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
580 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
581 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
582 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
583 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
584 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
585 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
586 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
587 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
588 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
589 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
590
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
591 #define REAL_YSCALEYUV2PACKED1b(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
592 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
593 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
594 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
595 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
596 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
597 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
598 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
599 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
600 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
601 "psrlw $8, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
602 "psrlw $8, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
603 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
604 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
605 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
606 "psraw $7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
607 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
608
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
609 // do vertical chrominance interpolation
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
610 #define REAL_YSCALEYUV2RGB1b(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
611 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
612 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
613 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
614 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
615 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
616 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
617 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
618 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
619 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
620 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
621 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
622 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
623 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
624 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
625 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
626 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
627 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
628 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
629 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
630 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
631 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
632 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
633 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
634 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
635 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
636 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
637 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
638 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
639 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
640 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
641 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
642 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
643 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
644 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
645 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
646 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
647 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
648 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
649 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
650 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
651 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
652 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
653 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
654 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
655 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
656 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
657 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
658 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
659 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
660 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
661 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
662
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
663 #define REAL_WRITEBGR32(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
664 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
665 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
666 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
667 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
668 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
669 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
670 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
671 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
672 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
673 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
674 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
675 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
676 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
677 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
678 MOVNTQ(%%mm0, (dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
679 MOVNTQ(%%mm2, 8(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
680 MOVNTQ(%%mm1, 16(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
681 MOVNTQ(%%mm3, 24(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
682 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
683 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
684 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
685 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
686 #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
687
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
688 #define REAL_WRITEBGR16(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
689 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
690 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
691 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
692 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
693 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
694 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
695 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
696 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
697 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
698 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
699 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
700 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
701 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
702 "psllq $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
703 "psllq $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
704 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
705 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
706 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
707 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
708 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
709 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
710 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
711 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
712 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
713 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
714 #define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
715
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
716 #define REAL_WRITEBGR15(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
717 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
718 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
719 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
720 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
721 "psrlq $1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
722 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
723 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
724 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
725 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
726 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
727 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
728 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
729 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
730 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
731 "psllq $2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
732 "psllq $2, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
733 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
734 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
735 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
736 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
737 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
738 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
739 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
740 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
741 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
742 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
743 #define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
744
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
745 #define WRITEBGR24OLD(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
746 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
747 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
748 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
749 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
750 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
751 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
752 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
753 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
754 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
755 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
756 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
757 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
758 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
759 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
760 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
761 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
762 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
763 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
764 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
765 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
766 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
767 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
768 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
769 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
770 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
771 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
772 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
773 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
774 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
775 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
776 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
777 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
778 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
779 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
780 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
781 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
782 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
783 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
784 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
785 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
786 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
787 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
788 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
789 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
790 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
791 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
792 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
793 MOVNTQ(%%mm2, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
794 MOVNTQ(%%mm3, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
795 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
796 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
797 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
798 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
799 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
800
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
801 #define WRITEBGR24MMX(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
802 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
803 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
804 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
805 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
806 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
807 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
808 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
809 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
810 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
811 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
812 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
813 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
814 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
815 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
816 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
817 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
818 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
819 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
820 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
821 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
822 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
823 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
824 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
825 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
826 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
827 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
828 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
829 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
830 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
831 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
832 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
833 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
834 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
835 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
836 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
837 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
838 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
839 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
840 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
841 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
842 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
843 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
844 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
845 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
846 MOVNTQ(%%mm5, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
847 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
848 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
849 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
850 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
851 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
852 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
853
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
854 #define WRITEBGR24MMX2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
855 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
856 "movq "MANGLE(M24A)", %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
857 "movq "MANGLE(M24C)", %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
858 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
859 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
860 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
861 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
862 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
863 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
864 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
865 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
866 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
867 "por %%mm1, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
868 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
869 MOVNTQ(%%mm6, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
870 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
871 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
872 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
873 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
874 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
875 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
876 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
877 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
878 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
879 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
880 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
881 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
882 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
883 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
884 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
885 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
886 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
887 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
888 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
889 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
890 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
891 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
892 "por %%mm1, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
893 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
894 MOVNTQ(%%mm6, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
895 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
896 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
897 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
898 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
899 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
900 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
901
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
902 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
903 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
904 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
905 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
906 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
907 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
908 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
909
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
910 #define REAL_WRITEYUY2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
911 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
912 "packuswb %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
913 "packuswb %%mm7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
914 "punpcklbw %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
915 "movq %%mm1, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
916 "punpcklbw %%mm3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
917 "punpckhbw %%mm3, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
918 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
919 MOVNTQ(%%mm1, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
920 MOVNTQ(%%mm7, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
921 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
922 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
923 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
924 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
925 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
926
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
927
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
928 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
929 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
930 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
931 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
932 #ifdef HAVE_MMX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
933 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
934 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
935 YSCALEYUV2YV12X_ACCURATE( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
936 YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
937 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
938
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
939 YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
940 }else{
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
941 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
942 YSCALEYUV2YV12X( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
943 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
944 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
945
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
946 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
947 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
948 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
949 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
950 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
951 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
952 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
953 #else //HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
954 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
955 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
956 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
957 #endif //!HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
958 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
959 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
960
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
961 static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
962 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
963 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
964 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
965 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
966 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
967 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
968 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
969
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
970 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
971 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
972 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
973 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
974 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
975 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
976 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
977 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
978 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
979 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
980 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
981 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
982
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
983 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
984 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
985 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
986 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
987 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
988 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
989 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
990
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
991 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
992 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
993 :: "r" (lumSrc + dstW), "r" (dest + dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
994 "g" (-dstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
995 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
996 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
997 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
998 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
999 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1000 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1001 int val= lumSrc[i]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1002
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1003 if(val&256){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1004 if(val<0) val=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1005 else val=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1006 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1007
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1008 dest[i]= val;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1009 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1010
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1011 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1012 for(i=0; i<chrDstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1013 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1014 int u=chrSrc[i]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1015 int v=chrSrc[i + 2048]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1016
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1017 if((u|v)&256){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1018 if(u<0) u=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1019 else if (u>255) u=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1020 if(v<0) v=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1021 else if (v>255) v=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1022 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1023
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1024 uDest[i]= u;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1025 vDest[i]= v;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1026 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1027 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1028 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1029
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1030
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1031 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1032 * vertical scale YV12 to RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1033 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1034 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1035 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1036 uint8_t *dest, long dstW, long dstY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1037 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1038 long dummy=0;
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1039 #ifdef HAVE_MMX
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1040 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1041 switch(c->dstFormat){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1042 case IMGFMT_BGR32:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1043 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1044 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1045 WRITEBGR32(%4, %5, %%REGa)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1046
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1047 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1048 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1049 case IMGFMT_BGR24:
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1050 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1051 YSCALEYUV2RGBX
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1052 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1053 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1054 WRITEBGR24(%%REGc, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1055
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1056
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1057 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1058 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1059 "r" (dest), "m" (dstW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1060 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1061 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1062 return;
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1063 case IMGFMT_BGR15:
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1064 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1065 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1066 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1067 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1068 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1069 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1070 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1071 #endif
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1072
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1073 WRITEBGR15(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1074 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1075 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1076 case IMGFMT_BGR16:
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1077 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1078 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1079 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1080 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1081 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1082 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1083 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1084 #endif
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1085
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1086 WRITEBGR16(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1087 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1088 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1089 case IMGFMT_YUY2:
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1090 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1091 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1092
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1093 "psraw $3, %%mm3 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1094 "psraw $3, %%mm4 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1095 "psraw $3, %%mm1 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1096 "psraw $3, %%mm7 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1097 WRITEYUY2(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1098 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1099 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1100 }
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1101 }else{
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1102 switch(c->dstFormat)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1103 {
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1104 case IMGFMT_BGR32:
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1105 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1106 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1107 WRITEBGR32(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1108 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1109 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1110 case IMGFMT_BGR24:
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1111 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1112 YSCALEYUV2RGBX
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1113 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1114 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1115 WRITEBGR24(%%REGc, %5, %%REGa)
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1116
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1117 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1118 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1119 "r" (dest), "m" (dstW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1120 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1121 );
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1122 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1123 case IMGFMT_BGR15:
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1124 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1125 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1126 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1127 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1128 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1129 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1130 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1131 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1132
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1133 WRITEBGR15(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1134 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1135 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1136 case IMGFMT_BGR16:
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1137 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1138 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1139 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1140 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1141 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1142 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1143 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1144 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1145
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1146 WRITEBGR16(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1147 YSCALEYUV2PACKEDX_END
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1148 return;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1149 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1150 YSCALEYUV2PACKEDX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1151 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1152
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1153 "psraw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1154 "psraw $3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1155 "psraw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1156 "psraw $3, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1157 WRITEYUY2(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1158 YSCALEYUV2PACKEDX_END
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1159 return;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1160 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1161 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1162 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1163 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1164 /* The following list of supported dstFormat values should
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1165 match what's found in the body of altivec_yuv2packedX() */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1166 if(c->dstFormat==IMGFMT_ABGR || c->dstFormat==IMGFMT_BGRA ||
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1167 c->dstFormat==IMGFMT_BGR24 || c->dstFormat==IMGFMT_RGB24 ||
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1168 c->dstFormat==IMGFMT_RGBA || c->dstFormat==IMGFMT_ARGB)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1169 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1170 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1171 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1172 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1173 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1174 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1175 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1176 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1177 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1178
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1179 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1180 * vertical bilinear scale YV12 to RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1181 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1182 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1183 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1184 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1185 int yalpha1=yalpha^4095;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1186 int uvalpha1=uvalpha^4095;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1187 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1188
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1189 #if 0 //isn't used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1190 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1191 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1192 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1193 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1194 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1195 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1196 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1197
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1198
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1199 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1200 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1201 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1202
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1203 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1204 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1205 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1206
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1207 MOVNTQ(%%mm3, (%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1208 MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1209
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1210 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1211 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1212 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1213
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1214
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1215 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1216 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1217 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1218 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1219 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1220 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1221 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1222
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1223 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1224
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1225 // lsb ... msb
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1226 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1227 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1228
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1229 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1230 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1231 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1232
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1233 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1234 "psrlq $8, %%mm3 \n\t" // GR0BGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1235 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1236 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1237 "por %%mm2, %%mm3 \n\t" // BGRBGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1238 "movq %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1239 "psllq $48, %%mm1 \n\t" // 000000BG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1240 "por %%mm1, %%mm3 \n\t" // BGRBGRBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1241
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1242 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1243 "psrld $16, %%mm2 \n\t" // R000R000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1244 "psrlq $24, %%mm1 \n\t" // 0BGR0000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1245 "por %%mm2, %%mm1 \n\t" // RBGRR000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1246
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1247 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1248 "add %%"REG_a", %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1249
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1250 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1251 //FIXME Alignment
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1252 "movntq %%mm3, (%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1253 "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1254 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1255 "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1256 "psrlq $32, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1257 "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1258 "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1259 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1260 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1261 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1262 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1263
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1264 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1265 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1266 : "%"REG_a, "%"REG_b
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1267 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1268 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1269 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1270 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1271
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1272 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1273 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1274 "paddusb "MANGLE(g5Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1275 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1276 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1277 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1278 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1279 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1280 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1281
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1282 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1283 "psllw $2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1284 "psllw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1285 "pand "MANGLE(g15Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1286 "pand "MANGLE(r15Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1287
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1288 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1289 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1290
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1291 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1292
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1293 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1294 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1295 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1296
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1297 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1298 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1299 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1300 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1301 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1302 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1303 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1304
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1305 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1306 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1307 "paddusb "MANGLE(g6Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1308 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1309 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1310 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1311 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1312 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1313 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1314
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1315 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1316 "psllw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1317 "psllw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1318 "pand "MANGLE(g16Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1319 "pand "MANGLE(r16Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1320
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1321 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1322 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1323
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1324 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1325
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1326 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1327 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1328 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1329
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1330 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1331 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1332 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1333 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1334 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1335 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1336 case IMGFMT_RGB32:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1337 #ifndef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1338 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1339 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1340 if(dstFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1341 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1342 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1343 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1344 dest++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1345 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1346 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1347 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1348 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1349 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1350 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1351 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1352 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1353 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1354 dest+= 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1355 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1356 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1357 else if(dstFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1358 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1359 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1360 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1361 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1362 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1363 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1364 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1365 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1366 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1367 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1368 dest+= 3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1369 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1370 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1371 else if(dstFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1372 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1373 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1374 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1375 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1376 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1377 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1378 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1379
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1380 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1381 clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1382 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1383 clip_table16r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1384 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1385 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1386 else if(dstFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1387 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1388 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1389 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1390 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1391 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1392 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1393 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1394
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1395 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1396 clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1397 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1398 clip_table15r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1399 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1400 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1401 }//FULL_UV_IPOL
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1402 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1403 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1404 #endif // if 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1405 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1406 switch(c->dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1407 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1408 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1409 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1410 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1411 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1412 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1413 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1414 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1415 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1416 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1417 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1418
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1419 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1420 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1421 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1422 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1423 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1424 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1425 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1426 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1427 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1428 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1429 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1430 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1431 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1432 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1433 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1434 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1435 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1436 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1437 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1438 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1439 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1440 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1441 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1442 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1443 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1444 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1445 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1446 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1447 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1448
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1449 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1450 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1451 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1452
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1453 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1454 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1455 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1456 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1457 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1458 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1459 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1460 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1461 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1462 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1463 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1464 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1465 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1466 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1467 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1468 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1469
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1470 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1471 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1472 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1473 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1474 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1475 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1476 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1477 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1478 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1479 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1480 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1481 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1482 YSCALEYUV2PACKED(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1483 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1484 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1485 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1486 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1487 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1488 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1489 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1490 default: break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1491 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1492 #endif //HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1493 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1494 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1495
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1496 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1497 * YV12 to RGB without scaling or interpolating
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1498 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1499 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1500 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1501 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1502 const int yalpha1=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1503 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1504
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1505 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1506 const int yalpha= 4096; //FIXME ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1507
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1508 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1509 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1510 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1511 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1512 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1513
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1514 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1515 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1516 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1517 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1518 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1519 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1520 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1521 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1522 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1523 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1524 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1525 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1526 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1527 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1528
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1529 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1530 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1531 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1532 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1533 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1534 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1535 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1536 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1537 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1538 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1539 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1540 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1541 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1542
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1543 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1544 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1545 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1546 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1547 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1548 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1549 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1550 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1551 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1552 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1553 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1554 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1555 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1556 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1557 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1558 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1559 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1560 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1561 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1562
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1563 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1564 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1565 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1566 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1567 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1568 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1569 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1570 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1571 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1572 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1573 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1574 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1575 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1576 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1577 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1578 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1579
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1580 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1581 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1582 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1583
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1584 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1585 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1586 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1587 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1588 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1589 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1590 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1591 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1592 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1593 YSCALEYUV2PACKED1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1594 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1595 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1596 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1597
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1598 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1599 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1600 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1601 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1602 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1603 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1604 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1605 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1606 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1607 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1608 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1609 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1610 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1611 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1612 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1613 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1614 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1615 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1616 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1617
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1618 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1619 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1620 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1621 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1622 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1623 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1624 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1625 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1626 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1627 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1628 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1629 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1630 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1631
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1632 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1633 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1634 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1635 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1636 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1637 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1638 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1639 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1640 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1641 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1642 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1643 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1644 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1645 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1646 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1647 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1648 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1649 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1650 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1651
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1652 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1653 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1654 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1655 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1656 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1657 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1658 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1659 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1660 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1661 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1662 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1663 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1664 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1665 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1666 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1667 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1668
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1669 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1670 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1671 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1672
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1673 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1674 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1675 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1676 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1677 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1678 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1679 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1680 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1681 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1682 YSCALEYUV2PACKED1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1683 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1684 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1685 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1686
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1687 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1688 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1689 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1690 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1691 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1692 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1693 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1694 if( uvalpha < 2048 )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1695 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1696 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1697 }else{
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1698 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1699 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1700 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1701
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1702 //FIXME yuy2* can read upto 7 samples to much
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1703
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1704 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1705 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1706 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1707 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1708 "movq "MANGLE(bm01010101)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1709 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1710 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1711 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1712 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1713 "pand %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1714 "pand %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1715 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1716 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1717 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1718 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1719 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1720 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1721 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1722 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1723 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1724 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1725 dst[i]= src[2*i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1726 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1727 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1728
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1729 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1730 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1731 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1732 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1733 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1734 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1735 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1736 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1737 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1738 "movq (%2, %%"REG_a",4), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1739 "movq 8(%2, %%"REG_a",4), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1740 PAVGB(%%mm2, %%mm0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1741 PAVGB(%%mm3, %%mm1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1742 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1743 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1744 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1745 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1746 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1747 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1748 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1749 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1750 "movd %%mm0, (%4, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1751 "movd %%mm1, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1752 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1753 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1754 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1755 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1756 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1757 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1758 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1759 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1760 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1761 dstU[i]= (src1[4*i + 1] + src2[4*i + 1])>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1762 dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1763 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1764 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1765 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1766
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1767 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1768 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1769 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1770 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1771 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1772 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1773 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1774 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1775 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1776 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1777 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1778 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1779 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1780 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1781 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1782 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1783 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1784 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1785 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1786 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1787 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1788 dst[i]= src[2*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1789 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1790 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1791
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1792 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1793 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1794 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1795 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1796 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1797 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1798 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1799 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1800 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1801 "movq (%2, %%"REG_a",4), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1802 "movq 8(%2, %%"REG_a",4), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1803 PAVGB(%%mm2, %%mm0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1804 PAVGB(%%mm3, %%mm1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1805 "pand %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1806 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1807 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1808 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1809 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1810 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1811 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1812 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1813 "movd %%mm0, (%4, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1814 "movd %%mm1, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1815 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1816 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1817 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1818 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1819 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1820 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1821 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1822 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1823 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1824 dstU[i]= (src1[4*i + 0] + src2[4*i + 0])>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1825 dstV[i]= (src1[4*i + 2] + src2[4*i + 2])>>1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1826 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1827 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1828 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1829
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1830 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1831 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1832 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1833 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1834 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1835 int b= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1836 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1837 int r= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1838
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1839 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1840 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1841 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1842
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1843 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1844 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1845 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1846 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1847 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1848 const int a= ((uint32_t*)src1)[2*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1849 const int e= ((uint32_t*)src1)[2*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1850 const int c= ((uint32_t*)src2)[2*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1851 const int d= ((uint32_t*)src2)[2*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1852 const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1853 const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1854 const int b= l&0x3FF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1855 const int g= h>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1856 const int r= l>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1857
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1858 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1859 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1860 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1861 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1862
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1863 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1864 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1865 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1866 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1867 "mov %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1868 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1869 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1870 "pxor %%mm7, %%mm7 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1871 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
1872 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1873 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1874 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1875 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1876 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1877 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1878 "punpcklbw %%mm7, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1879 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1880 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1881 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1882 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1883 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1884 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1885 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1886 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1887 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1888 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1889 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1890 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1891 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1892 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1893 "packssdw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1894 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1895 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1896 "pmaddwd %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1897 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1898 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1899
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1900 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1901 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1902 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1903 "punpcklbw %%mm7, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1904 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1905 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1906 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1907 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1908 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1909 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1910 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1911 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1912 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1913 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1914 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1915 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1916 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1917 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1918 "packssdw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1919 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1920 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1921 "pmaddwd %%mm5, %%mm2 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1922 "add $24, %%"REG_d" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1923 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1924 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1925
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1926 "packuswb %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1927 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1928
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1929 "movq %%mm0, (%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1930 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1931 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1932 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1933 : "%"REG_a, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1934 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1935 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1936 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1937 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1938 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1939 int b= src[i*3+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1940 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1941 int r= src[i*3+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1942
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1943 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1944 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1945 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1946 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1947
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1948 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1949 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1950 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1951 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1952 "mov %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1953 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1954 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1955 "pxor %%mm7, %%mm7 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1956 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1957 "add %%"REG_d", %%"REG_d" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
1958 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1959 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1960 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1961 PREFETCH" 64(%1, %%"REG_d") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1962 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1963 "movq (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1964 "movq (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1965 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1966 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1967 PAVGB(%%mm1, %%mm0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1968 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1969 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1970 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1971 "psrlq $24, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1972 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1973 PAVGB(%%mm1, %%mm0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1974 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1975 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1976 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1977 #else
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1978 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1979 "movd (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1980 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1981 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1982 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1983 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1984 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1985 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1986 "paddw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1987 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1988 "paddw %%mm2, %%mm0 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1989 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1990 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1991 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1992 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1993 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1994 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1995 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1996 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1997 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1998 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1999 "paddw %%mm4, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2000 "psrlw $2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2001 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2002 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2003 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2004 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2005
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2006 "pmaddwd %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2007 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2008 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2009 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2010 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2011 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2012 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2013 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2014 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2015 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2016 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2017 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2018 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2019 "pmaddwd %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2020 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2021 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2022
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2023 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2024 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2025 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2026 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2027 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2028 PAVGB(%%mm1, %%mm4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2029 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2030 "movq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2031 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2032 "psrlq $24, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2033 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2034 PAVGB(%%mm1, %%mm4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2035 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2036 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2037 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2038 #else
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2039 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2040 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2041 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2042 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2043 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2044 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2045 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2046 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2047 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2048 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2049 "paddw %%mm2, %%mm4 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2050 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2051 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2052 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2053 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2054 "punpcklbw %%mm7, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2055 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2056 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2057 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2058 "paddw %%mm1, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2059 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2060 "paddw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2061 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2062 "psrlw $2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2063 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2064 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2065 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2066 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2067
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2068 "pmaddwd %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2069 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2070 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2071 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2072 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2073 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2074 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2075 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2076 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2077 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2078 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2079 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2080 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2081 "pmaddwd %%mm5, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2082 "add $24, %%"REG_d" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2083 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2084 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2085
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2086 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2087 "punpckldq %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2088 "punpckhdq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2089 "packsswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2090 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2091
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2092 "movd %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2093 "punpckhdq %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2094 "movd %%mm0, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2095 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2096 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2097 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2098 : "%"REG_a, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2099 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2100 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2101 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2102 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2103 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2104 int b= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2105 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2106 int r= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2107
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2108 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2109 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2110 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2111 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2112 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2113
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2114 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2115 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2116 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2117 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2118 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2119 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2120 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2121 int g= (d>>5)&0x3F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2122 int r= (d>>11)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2123
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2124 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2125 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2126 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2127
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2128 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2129 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2130 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2131 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2132 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2133 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2134 int d1= ((uint32_t*)src2)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2135
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2136 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2137 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2138
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2139 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2140 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2141
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2142 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2143 int r= (d>>11)&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2144 int g= d>>21;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2145 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2146 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2147 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2148 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2149
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2150 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2151 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2152 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2153 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2154 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2155 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2156 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2157 int g= (d>>5)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2158 int r= (d>>10)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2159
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2160 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2161 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2162 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2163
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2164 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2165 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2166 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2167 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2168 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2169 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2170 int d1= ((uint32_t*)src2)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2171
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2172 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2173 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2174
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2175 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2176 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2177
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2178 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2179 int r= (d>>10)&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2180 int g= d>>21;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2181 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2182 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2183 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2184 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2185
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2186
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2187 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2188 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2189 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2190 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2191 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2192 int r= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2193 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2194 int b= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2195
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2196 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2197 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2198 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2199
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2200 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2201 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2202 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2203 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2204 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2205 const int a= ((uint32_t*)src1)[2*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2206 const int e= ((uint32_t*)src1)[2*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2207 const int c= ((uint32_t*)src2)[2*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2208 const int d= ((uint32_t*)src2)[2*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2209 const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2210 const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2211 const int r= l&0x3FF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2212 const int g= h>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2213 const int b= l>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2214
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2215 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2216 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2217 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2218 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2219
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2220 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2221 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2222 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2223 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2224 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2225 int r= src[i*3+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2226 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2227 int b= src[i*3+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2228
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2229 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2230 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2231 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2232
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2233 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2234 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2235 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2236 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2237 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2238 int r= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2239 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2240 int b= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2241
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2242 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2243 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2244 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2245 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2246
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2247
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2248 // Bilinear / Bicubic scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2249 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2250 int16_t *filter, int16_t *filterPos, long filterSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2251 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2252 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2253 assert(filterSize % 4 == 0 && filterSize>0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2254 if(filterSize==4) // allways true for upscaling, sometimes for down too
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2255 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2256 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2257 filter-= counter*2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2258 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2259 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2260 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2261 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2262 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2263 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2264 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2265 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2266 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2267 "mov %%"REG_a", %%"REG_BP" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2268 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2269 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2270 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2271 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2272 "movq (%1, %%"REG_BP", 4), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2273 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2274 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2275 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2276 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2277 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2278 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2279 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2280 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2281 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2282 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2283 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2284 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2285 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2286 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2287 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2288
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2289 "pop %%"REG_BP" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2290 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2291 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2292 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2293 : "+a" (counter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2294 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2295 #if !defined(PIC)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2296 : "%"REG_b
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2297 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2298 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2299 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2300 else if(filterSize==8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2301 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2302 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2303 filter-= counter*4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2304 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2305 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2306 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2307 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2308 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2309 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2310 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2311 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2312 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2313 "mov %%"REG_a", %%"REG_BP" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2314 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2315 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2316 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2317 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2318 "movq (%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2319 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2320 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2321 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2322 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2323 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2324 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2325 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2326
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2327 "movq 8(%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2328 "movq 24(%1, %%"REG_BP", 8), %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2329 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2330 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2331 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2332 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2333 "pmaddwd %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2334 "pmaddwd %%mm2, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2335 "paddd %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2336 "paddd %%mm5, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2337
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2338 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2339 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2340 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2341 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2342 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2343 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2344 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2345 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2346
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2347 "pop %%"REG_BP" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2348 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2349 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2350 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2351 : "+a" (counter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2352 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2353 #if !defined(PIC)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2354 : "%"REG_b
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2355 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2356 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2357 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2358 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2359 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2360 uint8_t *offset = src+filterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2361 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2362 // filter-= counter*filterSize/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2363 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2364 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2365 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2366 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2367 "movq "MANGLE(w02)", %%mm6 \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2368 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2369 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2370 "mov %2, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2371 "movzwl (%%"REG_c", %0), %%eax \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2372 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2373 "mov %5, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2374 "pxor %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2375 "pxor %%mm5, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2376 "2: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2377 "movq (%1), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2378 "movq (%1, %6), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2379 "movd (%%"REG_c", %%"REG_a"), %%mm0\n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2380 "movd (%%"REG_c", %%"REG_d"), %%mm2\n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2381 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2382 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2383 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2384 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2385 "paddd %%mm3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2386 "paddd %%mm0, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2387 "add $8, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2388 "add $4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2389 "cmp %4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2390 " jb 2b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2391 "add %6, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2392 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2393 "psrad $8, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2394 "packssdw %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2395 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2396 "packssdw %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2397 "mov %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2398 "movd %%mm4, (%%"REG_a", %0) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2399 "add $4, %0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2400 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2401
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2402 : "+r" (counter), "+r" (filter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2403 : "m" (filterPos), "m" (dst), "m"(offset),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2404 "m" (src), "r" (filterSize*2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2405 : "%"REG_a, "%"REG_c, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2406 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2407 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2408 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2409 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2410 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2411 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2412 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2413 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2414 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2415 int j;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2416 int srcPos= filterPos[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2417 int val=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2418 // printf("filterPos: %d\n", filterPos[i]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2419 for(j=0; j<filterSize; j++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2420 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2421 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2422 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2423 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2424 // filter += hFilterSize;
19181
e40cf0305d4e Replace MIN() and MAX() with FFMIN() and FFMAX()
lucabe
parents: 19173
diff changeset
2425 dst[i] = FFMIN(FFMAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ...
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2426 // dst[i] = val>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2427 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2428 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2429 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2430 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2431 // *** horizontal scale Y line to temp buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2432 static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2433 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2434 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2435 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2436 int32_t *mmx2FilterPos)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2437 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2438 if(srcFormat==IMGFMT_YUY2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2439 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2440 RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2441 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2442 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2443 else if(srcFormat==IMGFMT_UYVY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2444 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2445 RENAME(uyvyToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2446 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2447 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2448 else if(srcFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2449 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2450 RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2451 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2452 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2453 else if(srcFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2454 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2455 RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2456 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2457 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2458 else if(srcFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2459 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2460 RENAME(bgr16ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2461 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2462 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2463 else if(srcFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2464 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2465 RENAME(bgr15ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2466 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2467 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2468 else if(srcFormat==IMGFMT_RGB32)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2469 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2470 RENAME(rgb32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2471 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2472 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2473 else if(srcFormat==IMGFMT_RGB24)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2474 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2475 RENAME(rgb24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2476 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2477 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2478
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2479 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2480 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2481 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2482 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2483 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2484 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2485 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2486 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2487 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2488 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2489 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2490 #if defined(ARCH_X86) || defined(ARCH_X86_64)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2491 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2492 int i;
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2493 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2494 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2495 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2496 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2497 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2498 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2499 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2500 "mov %%"REG_b", %5 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2501 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2502 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2503 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2504 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2505 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2506 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2507 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2508 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2509 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2510 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2511
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2512 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2513
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2514 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2515 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2516 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2517 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2518 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2519 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2520 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2521
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2522 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2523
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2524 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2525 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2526 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2527 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2528 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2529 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2530
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2531 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2532
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2533 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2534 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2535 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2536 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2537 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2538 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2539 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2540 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2541
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2542 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2543 "mov %5, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2544 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2545 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2546 "m" (funnyYCode)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2547 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2548 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2549 #endif
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2550 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2551 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2552 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2553 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2554 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2555 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2556 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2557 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2558 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2559 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2560 long xInc_shr16 = xInc >> 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2561 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2562 //NO MMX just normal asm ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2563 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2564 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2565 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2566 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2567 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2568 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2569 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2570 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2571 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2572 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2573 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2574 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2575 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2576 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2577 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2578 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2579 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2580
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2581 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2582 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2583 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2584 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2585 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2586 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2587 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2588 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2589 "movw %%si, 2(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2590 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2591 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2592
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2593
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2594 "add $2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2595 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2596 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2597
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2598
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2599 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2600 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2601 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2602 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2603 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2604 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2605 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2606 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2607 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2608 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2609 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2610 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2611 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2612 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2613 xpos+=xInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2614 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2615 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2616 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2617 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2618
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2619 inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2620 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2621 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2622 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2623 int32_t *mmx2FilterPos)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2624 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2625 if(srcFormat==IMGFMT_YUY2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2626 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2627 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2628 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2629 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2630 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2631 else if(srcFormat==IMGFMT_UYVY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2632 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2633 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2634 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2635 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2636 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2637 else if(srcFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2638 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2639 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2640 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2641 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2642 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2643 else if(srcFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2644 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2645 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2646 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2647 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2648 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2649 else if(srcFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2650 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2651 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2652 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2653 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2654 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2655 else if(srcFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2656 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2657 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2658 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2659 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2660 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2661 else if(srcFormat==IMGFMT_RGB32)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2662 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2663 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2664 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2665 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2666 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2667 else if(srcFormat==IMGFMT_RGB24)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2668 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2669 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2670 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2671 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2672 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2673 else if(isGray(srcFormat))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2674 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2675 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2676 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2677
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2678 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2679 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2680 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2681 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2682 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2683 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2684 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2685 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2686 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2687 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2688 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2689 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2690 #if defined(ARCH_X86) || defined(ARCH_X86_64)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2691 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2692 int i;
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2693 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2694 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2695 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2696 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2697 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2698 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2699 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2700 "mov %%"REG_b", %6 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2701 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2702 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2703 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2704 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2705 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2706 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2707 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2708 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2709 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2710 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2711
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2712 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2713
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2714 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2715 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2716 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2717 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2718 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2719 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2720 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2721
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2722 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2723
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2724 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2725 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2726 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2727 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2728 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2729 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2730
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2731 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2732
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2733 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2734 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2735 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2736 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2737 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2738 "mov %5, %%"REG_c" \n\t" // src
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2739 "mov %1, %%"REG_D" \n\t" // buf1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2740 "add $4096, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2741 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2742 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2743 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2744
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2745 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2746 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2747 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2748 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2749
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2750 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2751 "mov %6, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2752 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2753 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2754 "m" (funnyUVCode), "m" (src2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2755 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2756 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2757 #endif
19400
0310c3310360 Fix compilation with -no-PIC and without -fomit-frame-pointer (used by
uau
parents: 19396
diff changeset
2758 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2759 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2760 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2761 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2762 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2763 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2764 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2765 // printf("%d %d %d\n", dstWidth, i, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2766 dst[i] = src1[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2767 dst[i+2048] = src2[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2768 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2769 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2770 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2771 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2772 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2773 long xInc_shr16 = (long) (xInc >> 16);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2774 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2775 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2776 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2777 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2778 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2779 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2780 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2781 "mov %0, %%"REG_S" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2782 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2783 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2784 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2785 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2786 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2787 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2788 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2789 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2790 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2791
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2792 "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2793 "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2794 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2795 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2796 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2797 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2798 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2799 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2800 "movw %%si, 4096(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2801
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2802 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2803 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2804 "add $1, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2805 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2806 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2807
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2808 /* GCC-3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2809 which is needed to support GCC-4.0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2810 #if defined(ARCH_X86_64) && ((__GNUC__ > 3) || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2811 :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2812 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2813 :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2814 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2815 "r" (src2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2816 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2817 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2818 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2819 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2820 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2821 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2822 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2823 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2824 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2825 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2826 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2827 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2828 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2829 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2830 /* slower
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2831 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2832 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2833 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2834 xpos+=xInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2835 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2836 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2837 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2838 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2839
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2840 static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2841 int srcSliceH, uint8_t* dst[], int dstStride[]){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2842
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2843 /* load a few things into local vars to make the code more readable? and faster */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2844 const int srcW= c->srcW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2845 const int dstW= c->dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2846 const int dstH= c->dstH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2847 const int chrDstW= c->chrDstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2848 const int chrSrcW= c->chrSrcW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2849 const int lumXInc= c->lumXInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2850 const int chrXInc= c->chrXInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2851 const int dstFormat= c->dstFormat;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2852 const int srcFormat= c->srcFormat;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2853 const int flags= c->flags;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2854 const int canMMX2BeUsed= c->canMMX2BeUsed;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2855 int16_t *vLumFilterPos= c->vLumFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2856 int16_t *vChrFilterPos= c->vChrFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2857 int16_t *hLumFilterPos= c->hLumFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2858 int16_t *hChrFilterPos= c->hChrFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2859 int16_t *vLumFilter= c->vLumFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2860 int16_t *vChrFilter= c->vChrFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2861 int16_t *hLumFilter= c->hLumFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2862 int16_t *hChrFilter= c->hChrFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2863 int32_t *lumMmxFilter= c->lumMmxFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2864 int32_t *chrMmxFilter= c->chrMmxFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2865 const int vLumFilterSize= c->vLumFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2866 const int vChrFilterSize= c->vChrFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2867 const int hLumFilterSize= c->hLumFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2868 const int hChrFilterSize= c->hChrFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2869 int16_t **lumPixBuf= c->lumPixBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2870 int16_t **chrPixBuf= c->chrPixBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2871 const int vLumBufSize= c->vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2872 const int vChrBufSize= c->vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2873 uint8_t *funnyYCode= c->funnyYCode;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2874 uint8_t *funnyUVCode= c->funnyUVCode;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2875 uint8_t *formatConvBuffer= c->formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2876 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2877 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2878 int lastDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2879
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2880 /* vars whch will change and which we need to storw back in the context */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2881 int dstY= c->dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2882 int lumBufIndex= c->lumBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2883 int chrBufIndex= c->chrBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2884 int lastInLumBuf= c->lastInLumBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2885 int lastInChrBuf= c->lastInChrBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2886
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2887 if(isPacked(c->srcFormat)){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2888 src[0]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2889 src[1]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2890 src[2]= src[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2891 srcStride[0]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2892 srcStride[1]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2893 srcStride[2]= srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2894 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2895 srcStride[1]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2896 srcStride[2]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2897
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2898 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2899 // (int)dst[0], (int)dst[1], (int)dst[2]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2900
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2901 #if 0 //self test FIXME move to a vfilter or something
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2902 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2903 static volatile int i=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2904 i++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2905 if(srcFormat==IMGFMT_YV12 && i==1 && srcSliceH>= c->srcH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2906 selfTest(src, srcStride, c->srcW, c->srcH);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2907 i--;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2908 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2909 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2910
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2911 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2912 //dstStride[0],dstStride[1],dstStride[2]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2913
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2914 if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2915 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2916 static int firstTime=1; //FIXME move this into the context perhaps
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2917 if(flags & SWS_PRINT_INFO && firstTime)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2918 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2919 MSG_WARN("SwScaler: Warning: dstStride is not aligned!\n"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2920 "SwScaler: ->cannot do aligned memory acesses anymore\n");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2921 firstTime=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2922 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2923 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2924
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2925 /* Note the user might start scaling the picture in the middle so this will not get executed
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2926 this is not really intended but works currently, so ppl might do it */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2927 if(srcSliceY ==0){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2928 lumBufIndex=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2929 chrBufIndex=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2930 dstY=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2931 lastInLumBuf= -1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2932 lastInChrBuf= -1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2933 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2934
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2935 lastDstY= dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2936
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2937 for(;dstY < dstH; dstY++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2938 unsigned char *dest =dst[0]+dstStride[0]*dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2939 const int chrDstY= dstY>>c->chrDstVSubSample;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2940 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2941 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2942
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2943 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2944 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2945 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2946 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2947
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2948 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2949 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2950 //handle holes (FAST_BILINEAR & weird filters)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2951 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2952 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2953 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2954 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2955 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2956
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2957 // Do we have enough lines in this slice to output the dstY line
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2958 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2959 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2960 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2961 while(lastInLumBuf < lastLumSrcY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2962 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2963 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2964 lumBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2965 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2966 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2967 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2968 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2969 // printf("%d %d\n", lumBufIndex, vLumBufSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2970 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2971 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2972 funnyYCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2973 c->lumMmx2Filter, c->lumMmx2FilterPos);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2974 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2975 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2976 while(lastInChrBuf < lastChrSrcY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2977 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2978 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2979 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2980 chrBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2981 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2982 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2983 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2984 //FIXME replace parameters through context struct (some at least)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2985
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2986 if(!(isGray(srcFormat) || isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2987 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2988 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2989 funnyUVCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2990 c->chrMmx2Filter, c->chrMmx2FilterPos);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2991 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2992 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2993 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2994 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2995 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2996 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2997 else // not enough lines left in this slice -> load the rest in the buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2998 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2999 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3000 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3001 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3002 vChrBufSize, vLumBufSize);*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3003
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3004 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3005 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3006 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3007 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3008 lumBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3009 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3010 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3011 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3012 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3013 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3014 funnyYCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3015 c->lumMmx2Filter, c->lumMmx2FilterPos);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3016 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3017 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3018 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3019 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3020 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3021 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3022 chrBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3023 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3024 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3025 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3026
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3027 if(!(isGray(srcFormat) || isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3028 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3029 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3030 funnyUVCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3031 c->chrMmx2Filter, c->chrMmx2FilterPos);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3032 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3033 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3034 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3035 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3036 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3037 break; //we can't output a dstY line so let's try with the next slice
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3038 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3039
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3040 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3041 b5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3042 g6Dither= dither4[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3043 g5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3044 r5Dither= dither8[(dstY+1)&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3045 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3046 if(dstY < dstH-2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3047 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3048 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3049 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3050 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3051 int i;
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3052 if(flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3053 for(i=0; i<vLumFilterSize; i+=2){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3054 lumMmxFilter[2*i+0]= lumSrcPtr[i ];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3055 lumMmxFilter[2*i+1]= lumSrcPtr[i+(vLumFilterSize>1)];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3056 lumMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3057 lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3058 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3059 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3060 for(i=0; i<vChrFilterSize; i+=2){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3061 chrMmxFilter[2*i+0]= chrSrcPtr[i ];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3062 chrMmxFilter[2*i+1]= chrSrcPtr[i+(vChrFilterSize>1)];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3063 chrMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3064 chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3065 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3066 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3067 }else{
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3068 for(i=0; i<vLumFilterSize; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3069 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3070 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3071 lumMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3072 lumMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3073 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3074 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3075 for(i=0; i<vChrFilterSize; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3076 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3077 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3078 chrMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3079 chrMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3080 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3081 }
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3082 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3083 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3084 if(dstFormat == IMGFMT_NV12 || dstFormat == IMGFMT_NV21){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3085 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3086 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3087 RENAME(yuv2nv12X)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3088 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3089 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3090 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3091 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3092 else if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3093 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3094 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3095 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3096 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3097 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3098 int16_t *lumBuf = lumPixBuf[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3099 int16_t *chrBuf= chrPixBuf[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3100 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3101 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3102 else //General YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3103 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3104 RENAME(yuv2yuvX)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3105 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3106 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3107 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3108 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3109 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3110 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3111 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3112 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3113 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3114 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3115 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3116 int chrAlpha= vChrFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3117 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3118 dest, dstW, chrAlpha, dstFormat, flags, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3119 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3120 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3121 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3122 int lumAlpha= vLumFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3123 int chrAlpha= vChrFilter[2*dstY+1];
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3124 lumMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3125 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3126 chrMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3127 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3128 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3129 dest, dstW, lumAlpha, chrAlpha, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3130 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3131 else //General RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3132 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3133 RENAME(yuv2packedX)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3134 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3135 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3136 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3137 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3138 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3139 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3140 else // hmm looks like we can't use MMX here without overwriting this array's tail
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3141 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3142 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3143 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3144 if(dstFormat == IMGFMT_NV12 || dstFormat == IMGFMT_NV21){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3145 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3146 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3147 yuv2nv12XinC(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3148 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3149 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3150 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3151 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3152 else if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3153 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3154 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3155 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3156 yuv2yuvXinC(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3157 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3158 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3159 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3160 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3161 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3162 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3163 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3164 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3165 yuv2packedXinC(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3166 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3167 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3168 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3169 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3170 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3171 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3172
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3173 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3174 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3175 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3176 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3177 /* store changed local vars back in the context */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3178 c->dstY= dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3179 c->lumBufIndex= lumBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3180 c->chrBufIndex= chrBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3181 c->lastInLumBuf= lastInLumBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3182 c->lastInChrBuf= lastInChrBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3183
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3184 return dstY - lastDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3185 }