annotate libswscale/swscale_template.c @ 22218:ff7aa2aecb9d

pal8 input this is not optimized or anything (that is easy to add just use the existing unscaled converters if possible ...)
author michael
date Fri, 16 Feb 2007 21:17:15 +0000
parents a14ddab18acd
children 1b1286436771
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1 /*
20094
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
3 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
4 * This file is part of FFmpeg.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
5 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or modify
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
7 * it under the terms of the GNU General Public License as published by
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
8 * the Free Software Foundation; either version 2 of the License, or
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
9 * (at your option) any later version.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
10 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
14 * GNU General Public License for more details.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
15 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
16 * You should have received a copy of the GNU General Public License
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
17 * along with FFmpeg; if not, write to the Free Software
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
19 *
21029
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed.
lucabe
parents: 20946
diff changeset
20 * the C code (not assembly, mmx, ...) of this file can be used
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed.
lucabe
parents: 20946
diff changeset
21 * under the LGPL license too
20094
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
22 */
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
23
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
24 #undef REAL_MOVNTQ
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
25 #undef MOVNTQ
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
26 #undef PAVGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
27 #undef PREFETCH
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
28 #undef PREFETCHW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
29 #undef EMMS
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
30 #undef SFENCE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
31
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
32 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
33 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
34 #define EMMS "femms"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
35 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
36 #define EMMS "emms"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
37 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
38
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
39 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
40 #define PREFETCH "prefetch"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
41 #define PREFETCHW "prefetchw"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
42 #elif defined ( HAVE_MMX2 )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
43 #define PREFETCH "prefetchnta"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
44 #define PREFETCHW "prefetcht0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
45 #else
20724
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20589
diff changeset
46 #define PREFETCH " # nop"
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20589
diff changeset
47 #define PREFETCHW " # nop"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
48 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
49
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
50 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
51 #define SFENCE "sfence"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
52 #else
20724
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20589
diff changeset
53 #define SFENCE " # nop"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
54 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
55
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
56 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
57 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
58 #elif defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
59 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
60 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
61
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
62 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
63 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
64 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
65 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
66 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
67 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
68
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
69 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
70 #include "swscale_altivec_template.c"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
71 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
72
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
73 #define YSCALEYUV2YV12X(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
74 asm volatile(\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
75 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
76 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
77 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
78 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
79 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
80 ASMALIGN(4) /* FIXME Unroll? */\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
81 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
82 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
83 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
84 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
85 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
86 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
87 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
88 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
89 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
90 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
91 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
92 " jnz 1b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
93 "psraw $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
94 "psraw $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
95 "packuswb %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
96 MOVNTQ(%%mm3, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
97 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
98 "cmp %2, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
99 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
100 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
101 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
102 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
103 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
104 :: "r" (&c->redDither),\
21325
963e85e82154 Change "p" asm constraints to "g", since "p" was a no longer necessary hack to
reimar
parents: 21029
diff changeset
105 "r" (dest), "g" (width)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
106 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
107 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
108
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
109 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
110 asm volatile(\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
111 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
112 "xor %%"REG_a", %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
113 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
114 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
115 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
116 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
117 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
118 ASMALIGN(4) \
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
119 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
120 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
121 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
122 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
123 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
124 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
125 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
126 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
127 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
128 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
129 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
130 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
131 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
132 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
133 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
134 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
135 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
136 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
137 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
138 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
139 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
140 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
141 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
142 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
143 " jnz 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
144 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
145 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
146 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
147 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
148 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
149 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
150 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
151 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
152 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
153 "psraw $3, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
154 "psraw $3, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
155 "packuswb %%mm6, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
156 MOVNTQ(%%mm4, (%1, %%REGa))\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
157 "add $8, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
158 "cmp %2, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
159 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
160 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
161 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
162 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
163 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
164 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
165 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
166 :: "r" (&c->redDither),\
21325
963e85e82154 Change "p" asm constraints to "g", since "p" was a no longer necessary hack to
reimar
parents: 21029
diff changeset
167 "r" (dest), "g" (width)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
168 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
169 );
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
170
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
171 #define YSCALEYUV2YV121 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
172 "mov %2, %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
173 ASMALIGN(4) /* FIXME Unroll? */\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
174 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
175 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
176 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
177 "psraw $7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
178 "psraw $7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
179 "packuswb %%mm1, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
180 MOVNTQ(%%mm0, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
181 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
182 "jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
183
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
184 /*
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
185 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
186 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
187 "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
188 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
189 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
190 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
191 #define YSCALEYUV2PACKEDX \
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
192 asm volatile(\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
193 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
194 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
195 "nop \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
196 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
197 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
198 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
199 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
200 "movq %%mm3, %%mm4 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
201 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
202 "2: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
203 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
204 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
205 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
206 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
207 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
208 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
209 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
210 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
211 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
212 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
213 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
214 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
215 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
216 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
217 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
218 "movq %%mm1, %%mm7 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
219 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
220 "2: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
221 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
222 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
223 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
224 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
225 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
226 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
227 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
228 "paddw %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
229 "paddw %%mm5, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
230 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
231 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
232
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
233 #define YSCALEYUV2PACKEDX_END\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
234 :: "r" (&c->redDither), \
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
235 "m" (dummy), "m" (dummy), "m" (dummy),\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
236 "r" (dest), "m" (dstW)\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
237 : "%"REG_a, "%"REG_d, "%"REG_S\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
238 );
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
239
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
240 #define YSCALEYUV2PACKEDX_ACCURATE \
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
241 asm volatile(\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
242 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
243 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
244 "nop \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
245 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
246 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
247 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
248 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
249 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
250 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
251 "pxor %%mm7, %%mm7 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
252 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
253 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
254 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
255 "movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
256 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
257 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
258 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
259 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
260 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
261 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
262 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
263 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
264 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
265 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
266 "movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
267 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
268 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
269 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
270 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
271 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
272 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
273 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
274 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
275 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
276 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
277 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
278 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
279 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
280 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
281 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
282 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
283 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
284 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
285 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
286 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
287 "movq %%mm4, "U_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
288 "movq %%mm6, "V_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
289 \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
290 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
291 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
292 "pxor %%mm1, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
293 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
294 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
295 "pxor %%mm6, %%mm6 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
296 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
297 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
298 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
299 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
300 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
301 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
302 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
303 "punpcklwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
304 "punpckhwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
305 "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
306 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
307 "pmaddwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
308 "paddd %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
309 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
310 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
311 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
312 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
313 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
314 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
315 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
316 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
317 "pmaddwd %%mm4, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
318 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
319 "paddd %%mm2, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
320 "paddd %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
321 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
322 "psrad $16, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
323 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
324 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
325 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
326 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
327 "packssdw %%mm5, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
328 "packssdw %%mm6, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
329 "paddw %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
330 "paddw %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
331 "movq "U_TEMP"(%0), %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
332 "movq "V_TEMP"(%0), %%mm4 \n\t"\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
333
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
334 #define YSCALEYUV2RGBX \
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
335 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
336 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
337 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
338 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
339 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
340 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
341 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
342 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
343 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
344 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
345 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
346 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
347 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
348 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
349 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
350 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
351 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
352 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
353 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
354 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
355 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
356 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
357 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
358 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
359 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
360 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
361 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
362 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
363 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
364 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
365 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
366 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
367 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
368 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
369 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
370 #if 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
371 #define FULL_YSCALEYUV2RGB \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
372 "pxor %%mm7, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
373 "movd %6, %%mm6 \n\t" /*yalpha1*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
374 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
375 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
376 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
377 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
378 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
379 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
380 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
381 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
382 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
383 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
384 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
385 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
386 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
387 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
388 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
389 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
390 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
391 "movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
392 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
393 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
394 "movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
395 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
396 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
397 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
398 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
399 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
400 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
401 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
402 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
403 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
404 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
405 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
406 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
407 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
408 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
409 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
410 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
411 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
412 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
413 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
414 "paddw %%mm1, %%mm3 \n\t" /* B*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
415 "paddw %%mm1, %%mm0 \n\t" /* R*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
416 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
417 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
418 "packuswb %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
419 "paddw %%mm4, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
420 "paddw %%mm2, %%mm1 \n\t" /* G*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
421 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
422 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
423 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
424
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
425 #define REAL_YSCALEYUV2PACKED(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
426 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
427 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
428 "psraw $3, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
429 "psraw $3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
430 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
431 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
432 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
433 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
434 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
435 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
436 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
437 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
438 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
439 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
440 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
441 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
442 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
443 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
444 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
445 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
446 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
447 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
448 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
449 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
450 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
451 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
452 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
453 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
454 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
455 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
456 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
457 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
458 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
459 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
460
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
461 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
462
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
463 #define REAL_YSCALEYUV2RGB(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
464 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
465 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
466 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
467 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
468 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
469 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
470 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
471 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
472 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
473 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
474 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
475 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
476 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
477 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
478 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
479 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
480 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
481 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
482 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
483 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
484 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
485 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
486 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
487 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
488 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
489 "movq 8(%0, "#index", 2), %%mm6\n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
490 "movq 8(%1, "#index", 2), %%mm7\n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
491 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
492 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
493 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
494 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
495 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
496 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
497 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
498 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
499 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
500 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
501 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
502 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
503 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
504 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
505 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
506 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
507 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
508 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
509 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
510 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
511 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
512 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
513 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
514 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
515 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
516 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
517 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
518 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
519 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
520 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
521 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
522 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
523 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
524 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
525 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
526 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
527 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
528
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
529 #define REAL_YSCALEYUV2PACKED1(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
530 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
531 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
532 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
533 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
534 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
535 "psraw $7, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
536 "psraw $7, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
537 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
538 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
539 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
540 "psraw $7, %%mm7 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
541
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
542 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
543
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
544 #define REAL_YSCALEYUV2RGB1(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
545 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
546 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
547 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
548 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
549 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
550 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
551 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
552 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
553 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
554 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
555 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
556 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
557 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
558 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
559 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
560 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
561 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
562 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
563 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
564 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
565 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
566 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
567 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
568 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
569 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
570 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
571 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
572 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
573 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
574 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
575 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
576 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
577 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
578 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
579 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
580 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
581 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
582 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
583 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
584 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
585 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
586 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
587 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
588 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
589 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
590 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
591 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
592
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
593 #define REAL_YSCALEYUV2PACKED1b(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
594 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
595 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
596 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
597 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
598 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
599 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
600 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
601 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
602 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
603 "psrlw $8, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
604 "psrlw $8, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
605 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
606 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
607 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
608 "psraw $7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
609 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
610
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
611 // do vertical chrominance interpolation
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
612 #define REAL_YSCALEYUV2RGB1b(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
613 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
614 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
615 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
616 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
617 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
618 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
619 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
620 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
621 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
622 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
623 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
624 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
625 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
626 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
627 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
628 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
629 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
630 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
631 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
632 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
633 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
634 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
635 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
636 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
637 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
638 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
639 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
640 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
641 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
642 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
643 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
644 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
645 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
646 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
647 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
648 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
649 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
650 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
651 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
652 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
653 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
654 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
655 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
656 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
657 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
658 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
659 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
660 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
661 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
662 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
663 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
664
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
665 #define REAL_WRITEBGR32(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
666 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
667 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
668 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
669 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
670 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
671 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
672 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
673 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
674 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
675 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
676 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
677 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
678 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
679 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
680 MOVNTQ(%%mm0, (dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
681 MOVNTQ(%%mm2, 8(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
682 MOVNTQ(%%mm1, 16(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
683 MOVNTQ(%%mm3, 24(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
684 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
685 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
686 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
687 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
688 #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
689
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
690 #define REAL_WRITEBGR16(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
691 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
692 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
693 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
694 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
695 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
696 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
697 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
698 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
699 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
700 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
701 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
702 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
703 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
704 "psllq $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
705 "psllq $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
706 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
707 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
708 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
709 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
710 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
711 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
712 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
713 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
714 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
715 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
716 #define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
717
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
718 #define REAL_WRITEBGR15(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
719 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
720 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
721 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
722 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
723 "psrlq $1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
724 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
725 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
726 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
727 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
728 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
729 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
730 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
731 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
732 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
733 "psllq $2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
734 "psllq $2, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
735 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
736 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
737 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
738 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
739 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
740 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
741 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
742 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
743 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
744 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
745 #define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
746
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
747 #define WRITEBGR24OLD(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
748 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
749 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
750 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
751 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
752 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
753 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
754 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
755 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
756 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
757 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
758 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
759 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
760 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
761 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
762 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
763 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
764 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
765 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
766 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
767 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
768 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
769 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
770 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
771 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
772 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
773 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
774 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
775 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
776 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
777 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
778 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
779 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
780 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
781 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
782 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
783 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
784 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
785 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
786 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
787 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
788 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
789 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
790 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
791 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
792 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
793 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
794 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
795 MOVNTQ(%%mm2, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
796 MOVNTQ(%%mm3, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
797 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
798 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
799 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
800 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
801 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
802
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
803 #define WRITEBGR24MMX(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
804 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
805 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
806 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
807 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
808 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
809 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
810 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
811 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
812 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
813 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
814 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
815 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
816 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
817 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
818 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
819 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
820 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
821 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
822 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
823 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
824 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
825 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
826 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
827 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
828 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
829 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
830 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
831 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
832 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
833 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
834 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
835 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
836 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
837 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
838 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
839 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
840 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
841 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
842 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
843 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
844 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
845 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
846 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
847 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
848 MOVNTQ(%%mm5, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
849 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
850 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
851 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
852 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
853 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
854 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
855
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
856 #define WRITEBGR24MMX2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
857 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
858 "movq "MANGLE(M24A)", %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
859 "movq "MANGLE(M24C)", %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
860 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
861 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
862 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
863 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
864 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
865 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
866 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
867 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
868 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
869 "por %%mm1, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
870 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
871 MOVNTQ(%%mm6, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
872 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
873 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
874 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
875 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
876 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
877 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
878 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
879 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
880 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
881 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
882 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
883 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
884 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
885 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
886 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
887 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
888 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
889 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
890 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
891 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
892 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
893 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
894 "por %%mm1, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
895 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
896 MOVNTQ(%%mm6, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
897 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
898 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
899 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
900 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
901 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
902 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
903
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
904 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
905 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
906 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
907 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
908 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
909 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
910 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
911
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
912 #define REAL_WRITEYUY2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
913 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
914 "packuswb %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
915 "packuswb %%mm7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
916 "punpcklbw %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
917 "movq %%mm1, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
918 "punpcklbw %%mm3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
919 "punpckhbw %%mm3, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
920 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
921 MOVNTQ(%%mm1, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
922 MOVNTQ(%%mm7, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
923 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
924 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
925 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
926 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
927 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
928
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
929
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
930 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
931 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
932 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
933 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
934 #ifdef HAVE_MMX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
935 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
936 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
937 YSCALEYUV2YV12X_ACCURATE( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
938 YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
939 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
940
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
941 YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
942 }else{
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
943 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
944 YSCALEYUV2YV12X( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
945 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
946 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
947
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
948 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
949 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
950 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
951 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
952 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
953 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
954 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
955 #else //HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
956 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
957 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
958 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
959 #endif //!HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
960 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
961 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
962
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
963 static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
964 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
965 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
966 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
967 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
968 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
969 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
970 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
971
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
972 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
973 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
974 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
975 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
976 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
977 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
978 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
979 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
980 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
981 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
982 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
983 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
984
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
985 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
986 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
987 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
988 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
989 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
990 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
991 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
992
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
993 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
994 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
995 :: "r" (lumSrc + dstW), "r" (dest + dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
996 "g" (-dstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
997 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
998 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
999 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1000 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1001 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1002 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1003 int val= lumSrc[i]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1004
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1005 if(val&256){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1006 if(val<0) val=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1007 else val=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1008 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1009
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1010 dest[i]= val;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1011 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1012
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1013 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1014 for(i=0; i<chrDstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1015 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1016 int u=chrSrc[i]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1017 int v=chrSrc[i + 2048]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1018
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1019 if((u|v)&256){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1020 if(u<0) u=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1021 else if (u>255) u=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1022 if(v<0) v=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1023 else if (v>255) v=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1024 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1025
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1026 uDest[i]= u;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1027 vDest[i]= v;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1028 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1029 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1030 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1031
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1032
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1033 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1034 * vertical scale YV12 to RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1035 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1036 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1037 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1038 uint8_t *dest, long dstW, long dstY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1039 {
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1040 #ifdef HAVE_MMX
20015
d08ba4508bb0 Fix unused variable warning when compiling with MMX disabled.
diego
parents: 19872
diff changeset
1041 long dummy=0;
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1042 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1043 switch(c->dstFormat){
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1044 case PIX_FMT_RGB32:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1045 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1046 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1047 WRITEBGR32(%4, %5, %%REGa)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1048
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1049 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1050 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1051 case PIX_FMT_BGR24:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1052 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1053 YSCALEYUV2RGBX
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1054 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1055 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1056 WRITEBGR24(%%REGc, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1057
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1058
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1059 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1060 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1061 "r" (dest), "m" (dstW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1062 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1063 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1064 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1065 case PIX_FMT_BGR555:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1066 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1067 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1068 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1069 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1070 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1071 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1072 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1073 #endif
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1074
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1075 WRITEBGR15(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1076 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1077 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1078 case PIX_FMT_BGR565:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1079 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1080 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1081 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1082 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1083 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1084 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1085 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1086 #endif
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1087
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1088 WRITEBGR16(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1089 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1090 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1091 case PIX_FMT_YUYV422:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1092 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1093 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1094
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1095 "psraw $3, %%mm3 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1096 "psraw $3, %%mm4 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1097 "psraw $3, %%mm1 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1098 "psraw $3, %%mm7 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1099 WRITEYUY2(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1100 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1101 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1102 }
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1103 }else{
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1104 switch(c->dstFormat)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1105 {
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1106 case PIX_FMT_RGB32:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1107 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1108 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1109 WRITEBGR32(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1110 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1111 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1112 case PIX_FMT_BGR24:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1113 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1114 YSCALEYUV2RGBX
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1115 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1116 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1117 WRITEBGR24(%%REGc, %5, %%REGa)
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1118
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1119 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1120 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1121 "r" (dest), "m" (dstW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1122 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1123 );
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1124 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1125 case PIX_FMT_BGR555:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1126 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1127 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1128 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1129 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1130 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1131 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1132 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1133 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1134
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1135 WRITEBGR15(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1136 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1137 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1138 case PIX_FMT_BGR565:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1139 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1140 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1141 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1142 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1143 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1144 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1145 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1146 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1147
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1148 WRITEBGR16(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1149 YSCALEYUV2PACKEDX_END
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1150 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1151 case PIX_FMT_YUYV422:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1152 YSCALEYUV2PACKEDX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1153 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1154
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1155 "psraw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1156 "psraw $3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1157 "psraw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1158 "psraw $3, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1159 WRITEYUY2(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1160 YSCALEYUV2PACKEDX_END
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1161 return;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1162 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1163 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1164 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1165 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1166 /* The following list of supported dstFormat values should
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1167 match what's found in the body of altivec_yuv2packedX() */
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1168 if(c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1169 c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1170 c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1171 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1172 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1173 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1174 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1175 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1176 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1177 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1178 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1179 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1180
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1181 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1182 * vertical bilinear scale YV12 to RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1183 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1184 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1185 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1186 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1187 int yalpha1=yalpha^4095;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1188 int uvalpha1=uvalpha^4095;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1189 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1190
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1191 #if 0 //isn't used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1192 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1193 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1194 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1195 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1196 #ifdef HAVE_MMX
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1197 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1198 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1199
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1200
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1201 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1202 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1203 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1204
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1205 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1206 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1207 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1208
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1209 MOVNTQ(%%mm3, (%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1210 MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1211
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1212 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1213 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1214 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1215
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1216
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1217 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1218 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1219 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1220 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1221 break;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1222 case PIX_FMT_BGR24:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1223 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1224
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1225 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1226
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1227 // lsb ... msb
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1228 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1229 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1230
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1231 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1232 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1233 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1234
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1235 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1236 "psrlq $8, %%mm3 \n\t" // GR0BGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1237 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1238 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1239 "por %%mm2, %%mm3 \n\t" // BGRBGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1240 "movq %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1241 "psllq $48, %%mm1 \n\t" // 000000BG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1242 "por %%mm1, %%mm3 \n\t" // BGRBGRBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1243
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1244 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1245 "psrld $16, %%mm2 \n\t" // R000R000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1246 "psrlq $24, %%mm1 \n\t" // 0BGR0000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1247 "por %%mm2, %%mm1 \n\t" // RBGRR000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1248
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1249 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1250 "add %%"REG_a", %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1251
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1252 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1253 //FIXME Alignment
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1254 "movntq %%mm3, (%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1255 "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1256 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1257 "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1258 "psrlq $32, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1259 "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1260 "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1261 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1262 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1263 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1264 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1265
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1266 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1267 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1268 : "%"REG_a, "%"REG_b
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1269 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1270 break;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1271 case PIX_FMT_BGR555:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1272 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1273
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1274 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1275 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1276 "paddusb "MANGLE(g5Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1277 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1278 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1279 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1280 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1281 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1282 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1283
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1284 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1285 "psllw $2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1286 "psllw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1287 "pand "MANGLE(g15Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1288 "pand "MANGLE(r15Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1289
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1290 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1291 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1292
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1293 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1294
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1295 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1296 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1297 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1298
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1299 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1300 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1301 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1302 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1303 break;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1304 case PIX_FMT_BGR565:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1305 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1306
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1307 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1308 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1309 "paddusb "MANGLE(g6Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1310 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1311 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1312 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1313 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1314 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1315 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1316
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1317 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1318 "psllw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1319 "psllw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1320 "pand "MANGLE(g16Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1321 "pand "MANGLE(r16Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1322
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1323 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1324 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1325
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1326 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1327
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1328 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1329 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1330 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1331
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1332 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1333 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1334 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1335 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1336 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1337 #endif
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1338 case PIX_FMT_BGR32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1339 #ifndef HAVE_MMX
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1340 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1341 #endif
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1342 if(dstFormat==PIX_FMT_RGB32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1343 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1344 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1345 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1346 dest++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1347 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1348 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1349 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1350 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1351 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1352 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1353 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1354 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1355 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1356 dest+= 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1357 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1358 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1359 else if(dstFormat==PIX_FMT_BGR24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1360 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1361 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1362 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1363 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1364 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1365 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1366 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1367 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1368 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1369 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1370 dest+= 3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1371 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1372 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1373 else if(dstFormat==PIX_FMT_BGR565)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1374 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1375 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1376 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1377 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1378 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1379 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1380 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1381
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1382 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1383 clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1384 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1385 clip_table16r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1386 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1387 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1388 else if(dstFormat==PIX_FMT_BGR555)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1389 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1390 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1391 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1392 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1393 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1394 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1395 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1396
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1397 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1398 clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1399 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1400 clip_table15r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1401 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1402 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1403 }//FULL_UV_IPOL
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1404 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1405 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1406 #endif // if 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1407 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1408 switch(c->dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1409 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1410 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1411 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1412 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1413 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1414 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1415 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1416 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1417 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1418 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1419 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1420
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1421 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1422 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1423 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1424 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1425 case PIX_FMT_BGR24:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1426 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1427 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1428 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1429 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1430 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1431 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1432 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1433 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1434 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1435 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1436 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1437 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1438 case PIX_FMT_BGR555:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1439 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1440 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1441 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1442 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1443 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1444 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1445 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1446 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1447 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1448 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1449 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1450
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1451 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1452 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1453 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1454
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1455 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1456 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1457 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1458 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1459 case PIX_FMT_BGR565:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1460 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1461 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1462 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1463 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1464 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1465 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1466 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1467 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1468 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1469 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1470 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1471
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1472 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1473 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1474 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1475 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1476 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1477 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1478 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1479 case PIX_FMT_YUYV422:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1480 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1481 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1482 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1483 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1484 YSCALEYUV2PACKED(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1485 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1486 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1487 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1488 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1489 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1490 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1491 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1492 default: break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1493 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1494 #endif //HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1495 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1496 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1497
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1498 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1499 * YV12 to RGB without scaling or interpolating
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1500 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1501 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1502 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1503 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1504 const int yalpha1=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1505 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1506
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1507 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1508 const int yalpha= 4096; //FIXME ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1509
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1510 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1511 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1512 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1513 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1514 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1515
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1516 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1517 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1518 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1519 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1520 {
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1521 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1522 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1523 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1524 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1525 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1526 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1527 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1528 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1529 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1530
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1531 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1532 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1533 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1534 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1535 case PIX_FMT_BGR24:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1536 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1537 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1538 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1539 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1540 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1541 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1542 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1543 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1544
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1545 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1546 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1547 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1548 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1549 case PIX_FMT_BGR555:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1550 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1551 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1552 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1553 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1554 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1555 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1556 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1557 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1558 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1559 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1560 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1561 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1562 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1563 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1564
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1565 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1566 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1567 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1568 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1569 case PIX_FMT_BGR565:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1570 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1571 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1572 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1573 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1574 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1575 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1576 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1577 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1578 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1579 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1580 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1581
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1582 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1583 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1584 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1585
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1586 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1587 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1588 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1589 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1590 case PIX_FMT_YUYV422:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1591 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1592 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1593 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1594 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1595 YSCALEYUV2PACKED1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1596 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1597 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1598 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1599
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1600 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1601 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1602 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1603 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1604 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1605 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1606 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1607 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1608 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1609 {
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1610 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1611 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1612 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1613 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1614 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1615 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1616 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1617 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1618 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1619
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1620 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1621 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1622 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1623 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1624 case PIX_FMT_BGR24:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1625 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1626 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1627 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1628 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1629 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1630 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1631 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1632 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1633
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1634 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1635 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1636 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1637 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1638 case PIX_FMT_BGR555:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1639 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1640 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1641 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1642 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1643 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1644 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1645 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1646 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1647 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1648 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1649 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1650 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1651 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1652 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1653
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1654 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1655 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1656 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1657 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1658 case PIX_FMT_BGR565:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1659 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1660 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1661 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1662 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1663 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1664 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1665 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1666 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1667 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1668 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1669 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1670
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1671 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1672 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1673 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1674
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1675 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1676 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1677 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1678 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1679 case PIX_FMT_YUYV422:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1680 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1681 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1682 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1683 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1684 YSCALEYUV2PACKED1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1685 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1686 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1687 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1688
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1689 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1690 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1691 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1692 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1693 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1694 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1695 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1696 if( uvalpha < 2048 )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1697 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1698 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1699 }else{
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1700 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1701 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1702 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1703
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1704 //FIXME yuy2* can read upto 7 samples to much
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1705
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1706 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1707 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1708 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1709 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1710 "movq "MANGLE(bm01010101)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1711 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1712 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1713 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1714 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1715 "pand %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1716 "pand %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1717 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1718 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1719 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1720 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1721 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1722 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1723 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1724 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1725 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1726 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1727 dst[i]= src[2*i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1728 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1729 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1730
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1731 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1732 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1733 #ifdef HAVE_MMX
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1734 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1735 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1736 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1737 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1738 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1739 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1740 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1741 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1742 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1743 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1744 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1745 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1746 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1747 "packuswb %%mm1, %%mm1 \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1748 "movd %%mm0, (%3, %%"REG_a") \n\t"
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1749 "movd %%mm1, (%2, %%"REG_a") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1750 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1751 " js 1b \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1752 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1753 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1754 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1755 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1756 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1757 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1758 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1759 dstU[i]= src1[4*i + 1];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1760 dstV[i]= src1[4*i + 3];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1761 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1762 #endif
21686
79bb02931e40 Fix compilation of non-MMX code with gcc 2.95
lucabe
parents: 21325
diff changeset
1763 assert(src1 == src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1764 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1765
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1766 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1767 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1768 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1769 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1770 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1771 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1772 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1773 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1774 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1775 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1776 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1777 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1778 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1779 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1780 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1781 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1782 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1783 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1784 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1785 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1786 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1787 dst[i]= src[2*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1788 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1789 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1790
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1791 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1792 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1793 #ifdef HAVE_MMX
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1794 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1795 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1796 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1797 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1798 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1799 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1800 "pand %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1801 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1802 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1803 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1804 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1805 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1806 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1807 "packuswb %%mm1, %%mm1 \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1808 "movd %%mm0, (%3, %%"REG_a") \n\t"
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1809 "movd %%mm1, (%2, %%"REG_a") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1810 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1811 " js 1b \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1812 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1813 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1814 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1815 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1816 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1817 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1818 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1819 dstU[i]= src1[4*i + 0];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1820 dstV[i]= src1[4*i + 2];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1821 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1822 #endif
21686
79bb02931e40 Fix compilation of non-MMX code with gcc 2.95
lucabe
parents: 21325
diff changeset
1823 assert(src1 == src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1824 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1825
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1826 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1827 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1828 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1829 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1830 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1831 int b= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1832 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1833 int r= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1834
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1835 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1836 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1837 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1838
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1839 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1840 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1841 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1842 assert(src1 == src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1843 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1844 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1845 const int a= ((uint32_t*)src1)[2*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1846 const int e= ((uint32_t*)src1)[2*i+1];
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1847 const int l= (a&0xFF00FF) + (e&0xFF00FF);
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1848 const int h= (a&0x00FF00) + (e&0x00FF00);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1849 const int b= l&0x3FF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1850 const int g= h>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1851 const int r= l>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1852
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1853 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1854 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1855 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1856 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1857
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1858 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1859 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1860 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1861 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1862 "mov %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1863 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1864 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1865 "pxor %%mm7, %%mm7 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1866 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
1867 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1868 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1869 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1870 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1871 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1872 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1873 "punpcklbw %%mm7, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1874 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1875 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1876 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1877 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1878 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1879 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1880 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1881 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1882 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1883 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1884 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1885 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1886 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1887 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1888 "packssdw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1889 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1890 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1891 "pmaddwd %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1892 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1893 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1894
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1895 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1896 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1897 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1898 "punpcklbw %%mm7, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1899 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1900 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1901 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1902 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1903 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1904 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1905 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1906 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1907 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1908 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1909 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1910 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1911 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1912 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1913 "packssdw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1914 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1915 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1916 "pmaddwd %%mm5, %%mm2 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1917 "add $24, %%"REG_d" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1918 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1919 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1920
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1921 "packuswb %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1922 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1923
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1924 "movq %%mm0, (%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1925 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1926 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1927 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1928 : "%"REG_a, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1929 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1930 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1931 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1932 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1933 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1934 int b= src[i*3+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1935 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1936 int r= src[i*3+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1937
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1938 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1939 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1940 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1941 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1942
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1943 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1944 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1945 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1946 asm volatile(
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1947 "mov %3, %%"REG_a" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1948 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1949 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1950 "pxor %%mm7, %%mm7 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1951 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1952 "add %%"REG_d", %%"REG_d" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
1953 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1954 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1955 PREFETCH" 64(%0, %%"REG_d") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1956 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1957 "movq (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1958 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1959 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1960 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1961 "psrlq $24, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1962 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1963 PAVGB(%%mm1, %%mm0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1964 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1965 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1966 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1967 #else
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1968 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1969 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1970 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1971 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1972 "paddw %%mm2, %%mm0 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1973 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1974 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1975 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1976 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1977 "paddw %%mm4, %%mm2 \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1978 "psrlw $1, %%mm0 \n\t"
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1979 "psrlw $1, %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1980 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1981 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1982 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1983
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1984 "pmaddwd %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1985 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1986 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1987 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1988 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1989 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1990 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1991 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1992 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1993 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1994 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1995 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1996 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1997 "pmaddwd %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1998 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1999 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2001 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2002 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2003 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2004 "movq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2005 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2006 "psrlq $24, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2007 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2008 PAVGB(%%mm1, %%mm4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2009 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2010 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2011 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2012 #else
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2013 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2014 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2015 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2016 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2017 "paddw %%mm2, %%mm4 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2018 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2019 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2020 "punpcklbw %%mm7, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2021 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2022 "paddw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2023 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2024 "psrlw $2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2025 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2026 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2027 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2028 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2029
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2030 "pmaddwd %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2031 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2032 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2033 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2034 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2035 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2036 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2037 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2038 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2039 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2040 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2041 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2042 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2043 "pmaddwd %%mm5, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2044 "add $24, %%"REG_d" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2045 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2046 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2047
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2048 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2049 "punpckldq %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2050 "punpckhdq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2051 "packsswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2052 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2053
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2054 "movd %%mm0, (%1, %%"REG_a") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2055 "punpckhdq %%mm0, %%mm0 \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2056 "movd %%mm0, (%2, %%"REG_a") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2057 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2058 " js 1b \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2059 : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2060 : "%"REG_a, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2061 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2062 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2063 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2064 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2065 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2066 int b= src1[6*i + 0] + src1[6*i + 3];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2067 int g= src1[6*i + 1] + src1[6*i + 4];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2068 int r= src1[6*i + 2] + src1[6*i + 5];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2069
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2070 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2071 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2072 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2073 #endif
21686
79bb02931e40 Fix compilation of non-MMX code with gcc 2.95
lucabe
parents: 21325
diff changeset
2074 assert(src1 == src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2075 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2076
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2077 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2078 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2079 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2080 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2081 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2082 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2083 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2084 int g= (d>>5)&0x3F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2085 int r= (d>>11)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2086
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2087 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2088 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2089 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2090
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2091 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2092 {
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2093 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2094 assert(src1==src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2095 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2096 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2097 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2098
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2099 int dl= (d0&0x07E0F81F);
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2100 int dh= ((d0>>5)&0x07C0F83F);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2101
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2102 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2103 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2104
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2105 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2106 int r= (d>>11)&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2107 int g= d>>21;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2108 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2109 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2110 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2111 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2112
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2113 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2114 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2115 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2116 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2117 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2118 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2119 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2120 int g= (d>>5)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2121 int r= (d>>10)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2122
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2123 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2124 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2125 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2126
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2127 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2128 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2129 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2130 assert(src1==src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2131 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2132 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2133 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2134
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2135 int dl= (d0&0x03E07C1F);
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2136 int dh= ((d0>>5)&0x03E0F81F);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2137
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2138 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2139 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2140
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2141 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2142 int r= (d>>10)&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2143 int g= d>>21;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2144 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2145 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2146 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2147 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2148
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2149
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2150 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2151 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2152 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2153 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2154 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2155 int r= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2156 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2157 int b= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2158
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2159 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2160 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2161 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2162
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2163 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2164 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2165 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2166 assert(src1==src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2167 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2168 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2169 const int a= ((uint32_t*)src1)[2*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2170 const int e= ((uint32_t*)src1)[2*i+1];
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2171 const int l= (a&0xFF00FF) + (e&0xFF00FF);
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2172 const int h= (a&0x00FF00) + (e&0x00FF00);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2173 const int r= l&0x3FF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2174 const int g= h>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2175 const int b= l>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2176
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2177 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2178 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2179 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2180 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2181
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2182 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2183 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2184 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2185 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2186 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2187 int r= src[i*3+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2188 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2189 int b= src[i*3+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2190
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2191 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2192 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2193 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2194
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2195 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2196 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2197 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2198 assert(src1==src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2199 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2200 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2201 int r= src1[6*i + 0] + src1[6*i + 3];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2202 int g= src1[6*i + 1] + src1[6*i + 4];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2203 int b= src1[6*i + 2] + src1[6*i + 5];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2204
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2205 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2206 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2207 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2208 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2209
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2210 static inline void RENAME(rgb16ToY)(uint8_t *dst, uint8_t *src, int width)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2211 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2212 int i;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2213 for(i=0; i<width; i++)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2214 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2215 int d= ((uint16_t*)src)[i];
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2216 int r= d&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2217 int g= (d>>5)&0x3F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2218 int b= (d>>11)&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2219
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2220 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2221 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2222 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2223
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2224 static inline void RENAME(rgb16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2225 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2226 int i;
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2227 assert(src1 == src2);
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2228 for(i=0; i<width; i++)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2229 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2230 int d0= ((uint32_t*)src1)[i];
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2231
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2232 int dl= (d0&0x07E0F81F);
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2233 int dh= ((d0>>5)&0x07C0F83F);
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2234
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2235 int dh2= (dh>>11) + (dh<<21);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2236 int d= dh2 + dl;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2237
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2238 int r= d&0x7F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2239 int b= (d>>11)&0x7F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2240 int g= d>>21;
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2241 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2242 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2243 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2244 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2245
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2246 static inline void RENAME(rgb15ToY)(uint8_t *dst, uint8_t *src, int width)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2247 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2248 int i;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2249 for(i=0; i<width; i++)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2250 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2251 int d= ((uint16_t*)src)[i];
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2252 int r= d&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2253 int g= (d>>5)&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2254 int b= (d>>10)&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2255
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2256 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2257 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2258 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2259
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2260 static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2261 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2262 int i;
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2263 assert(src1 == src2);
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2264 for(i=0; i<width; i++)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2265 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2266 int d0= ((uint32_t*)src1)[i];
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2267
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2268 int dl= (d0&0x03E07C1F);
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2269 int dh= ((d0>>5)&0x03E0F81F);
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2270
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2271 int dh2= (dh>>11) + (dh<<21);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2272 int d= dh2 + dl;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2273
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2274 int g= d&0x7F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2275 int r= (d>>10)&0x7F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2276 int b= d>>21;
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2277 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2278 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2279 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2280 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2281
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2282 static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, int width, uint32_t *pal)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2283 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2284 int i;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2285 for(i=0; i<width; i++)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2286 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2287 int d= src[i];
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2288 int b= pal[d] &0xFF;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2289 int g=(pal[d]>>8 )&0xFF;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2290 int r= pal[d]>>16;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2291
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2292 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2293 }
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2294 }
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2295
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2296 static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width, uint32_t *pal)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2297 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2298 int i;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2299 assert(src1 == src2);
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2300 for(i=0; i<width; i++)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2301 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2302 int d0= src1[2*i ];
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2303 int d1= src1[2*i+1];
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2304 int p = (pal[d0]&0xFF00FF) + (pal[d1]&0xFF00FF);
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2305 int g = (pal[d0]+pal[d1]-p)>>8;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2306 int b= p&0x1FF;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2307 int r= p>>16;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2308
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2309 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2310 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2311 }
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2312 }
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2313
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2314 // Bilinear / Bicubic scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2315 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2316 int16_t *filter, int16_t *filterPos, long filterSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2317 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2318 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2319 assert(filterSize % 4 == 0 && filterSize>0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2320 if(filterSize==4) // allways true for upscaling, sometimes for down too
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2321 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2322 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2323 filter-= counter*2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2324 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2325 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2326 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2327 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2328 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2329 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2330 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2331 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2332 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2333 "mov %%"REG_a", %%"REG_BP" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2334 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2335 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2336 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2337 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2338 "movq (%1, %%"REG_BP", 4), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2339 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2340 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2341 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2342 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2343 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2344 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2345 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2346 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2347 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2348 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2349 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2350 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2351 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2352 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2353 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2354
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2355 "pop %%"REG_BP" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2356 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2357 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2358 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2359 : "+a" (counter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2360 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2361 #if !defined(PIC)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2362 : "%"REG_b
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2363 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2364 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2365 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2366 else if(filterSize==8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2367 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2368 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2369 filter-= counter*4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2370 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2371 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2372 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2373 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2374 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2375 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2376 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2377 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2378 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2379 "mov %%"REG_a", %%"REG_BP" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2380 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2381 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2382 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2383 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2384 "movq (%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2385 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2386 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2387 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2388 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2389 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2390 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2391 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2392
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2393 "movq 8(%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2394 "movq 24(%1, %%"REG_BP", 8), %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2395 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2396 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2397 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2398 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2399 "pmaddwd %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2400 "pmaddwd %%mm2, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2401 "paddd %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2402 "paddd %%mm5, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2403
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2404 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2405 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2406 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2407 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2408 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2409 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2410 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2411 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2412
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2413 "pop %%"REG_BP" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2414 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2415 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2416 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2417 : "+a" (counter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2418 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2419 #if !defined(PIC)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2420 : "%"REG_b
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2421 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2422 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2423 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2424 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2425 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2426 uint8_t *offset = src+filterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2427 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2428 // filter-= counter*filterSize/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2429 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2430 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2431 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2432 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2433 "movq "MANGLE(w02)", %%mm6 \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2434 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2435 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2436 "mov %2, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2437 "movzwl (%%"REG_c", %0), %%eax \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2438 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2439 "mov %5, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2440 "pxor %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2441 "pxor %%mm5, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2442 "2: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2443 "movq (%1), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2444 "movq (%1, %6), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2445 "movd (%%"REG_c", %%"REG_a"), %%mm0\n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2446 "movd (%%"REG_c", %%"REG_d"), %%mm2\n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2447 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2448 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2449 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2450 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2451 "paddd %%mm3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2452 "paddd %%mm0, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2453 "add $8, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2454 "add $4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2455 "cmp %4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2456 " jb 2b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2457 "add %6, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2458 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2459 "psrad $8, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2460 "packssdw %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2461 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2462 "packssdw %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2463 "mov %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2464 "movd %%mm4, (%%"REG_a", %0) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2465 "add $4, %0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2466 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2467
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2468 : "+r" (counter), "+r" (filter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2469 : "m" (filterPos), "m" (dst), "m"(offset),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2470 "m" (src), "r" (filterSize*2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2471 : "%"REG_a, "%"REG_c, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2472 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2473 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2474 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2475 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2476 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2477 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2478 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2479 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2480 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2481 int j;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2482 int srcPos= filterPos[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2483 int val=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2484 // printf("filterPos: %d\n", filterPos[i]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2485 for(j=0; j<filterSize; j++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2486 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2487 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2488 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2489 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2490 // filter += hFilterSize;
21760
18da248c2020 change all the occurrences of "FFMIN(FFMAX())" to clip_uint8() or clip()
lucabe
parents: 21756
diff changeset
2491 dst[i] = clip(val>>7, 0, (1<<15)-1); // the cubic equation does overflow ...
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2492 // dst[i] = val>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2493 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2494 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2495 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2496 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2497 // *** horizontal scale Y line to temp buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2498 static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2499 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2500 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2501 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2502 int32_t *mmx2FilterPos, uint8_t *pal)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2503 {
20411
208c6a5b8665 16-bit grayscale support
kostya
parents: 20094
diff changeset
2504 if(srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2505 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2506 RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2507 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2508 }
20411
208c6a5b8665 16-bit grayscale support
kostya
parents: 20094
diff changeset
2509 else if(srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2510 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2511 RENAME(uyvyToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2512 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2513 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2514 else if(srcFormat==PIX_FMT_RGB32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2515 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2516 RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2517 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2518 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2519 else if(srcFormat==PIX_FMT_BGR24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2520 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2521 RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2522 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2523 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2524 else if(srcFormat==PIX_FMT_BGR565)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2525 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2526 RENAME(bgr16ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2527 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2528 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2529 else if(srcFormat==PIX_FMT_BGR555)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2530 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2531 RENAME(bgr15ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2532 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2533 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2534 else if(srcFormat==PIX_FMT_BGR32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2535 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2536 RENAME(rgb32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2537 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2538 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2539 else if(srcFormat==PIX_FMT_RGB24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2540 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2541 RENAME(rgb24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2542 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2543 }
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2544 else if(srcFormat==PIX_FMT_RGB565)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2545 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2546 RENAME(rgb16ToY)(formatConvBuffer, src, srcW);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2547 src= formatConvBuffer;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2548 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2549 else if(srcFormat==PIX_FMT_RGB555)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2550 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2551 RENAME(rgb15ToY)(formatConvBuffer, src, srcW);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2552 src= formatConvBuffer;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2553 }
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2554 else if(srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2555 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2556 RENAME(palToY)(formatConvBuffer, src, srcW, pal);
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2557 src= formatConvBuffer;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2558 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2559
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2560 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2561 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2562 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2563 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2564 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2565 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2566 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2567 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2568 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2569 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2570 {
20576
9e7c80f126d6 Use common define for x86_32 and x86_64.
diego
parents: 20411
diff changeset
2571 #if defined(ARCH_X86)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2572 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2573 int i;
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2574 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2575 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2576 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2577 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2578 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2579 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2580 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2581 "mov %%"REG_b", %5 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2582 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2583 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2584 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2585 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2586 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2587 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2588 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2589 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2590 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2591 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2592
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2593 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2594
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2595 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2596 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2597 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2598 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2599 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2600 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2601 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2602
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2603 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2604
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2605 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2606 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2607 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2608 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2609 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2610 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2611
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2612 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2613
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2614 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2615 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2616 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2617 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2618 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2619 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2620 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2621 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2622
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2623 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2624 "mov %5, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2625 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2626 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2627 "m" (funnyYCode)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2628 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2629 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2630 #endif
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2631 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2632 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2633 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2634 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2635 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2636 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2637 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2638 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2639 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2640 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2641 long xInc_shr16 = xInc >> 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2642 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2643 //NO MMX just normal asm ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2644 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2645 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2646 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2647 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2648 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2649 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2650 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2651 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2652 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2653 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2654 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2655 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2656 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2657 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2658 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2659 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2660 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2661
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2662 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2663 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2664 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2665 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2666 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2667 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2668 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2669 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2670 "movw %%si, 2(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2671 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2672 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2673
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2674
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2675 "add $2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2676 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2677 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2678
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2679
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2680 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2681 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2682 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2683 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2684 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2685 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2686 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2687 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2688 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2689 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2690 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2691 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2692 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2693 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2694 xpos+=xInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2695 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2696 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2697 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2698 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2699
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2700 inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2701 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2702 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2703 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2704 int32_t *mmx2FilterPos, uint8_t *pal)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2705 {
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2706 if(srcFormat==PIX_FMT_YUYV422)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2707 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2708 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2709 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2710 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2711 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2712 else if(srcFormat==PIX_FMT_UYVY422)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2713 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2714 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2715 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2716 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2717 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2718 else if(srcFormat==PIX_FMT_RGB32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2719 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2720 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2721 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2722 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2723 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2724 else if(srcFormat==PIX_FMT_BGR24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2725 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2726 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2727 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2728 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2729 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2730 else if(srcFormat==PIX_FMT_BGR565)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2731 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2732 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2733 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2734 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2735 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2736 else if(srcFormat==PIX_FMT_BGR555)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2737 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2738 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2739 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2740 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2741 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2742 else if(srcFormat==PIX_FMT_BGR32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2743 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2744 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2745 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2746 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2747 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2748 else if(srcFormat==PIX_FMT_RGB24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2749 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2750 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2751 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2752 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2753 }
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2754 else if(srcFormat==PIX_FMT_RGB565)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2755 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2756 RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2757 src1= formatConvBuffer;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2758 src2= formatConvBuffer+2048;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2759 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2760 else if(srcFormat==PIX_FMT_RGB555)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2761 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2762 RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2763 src1= formatConvBuffer;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2764 src2= formatConvBuffer+2048;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2765 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2766 else if(isGray(srcFormat))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2767 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2768 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2769 }
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2770 else if(srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2771 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2772 RENAME(palToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW, pal);
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2773 src1= formatConvBuffer;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2774 src2= formatConvBuffer+2048;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2775 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2776
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2777 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2778 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2779 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2780 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2781 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2782 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2783 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2784 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2785 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2786 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2787 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2788 {
20576
9e7c80f126d6 Use common define for x86_32 and x86_64.
diego
parents: 20411
diff changeset
2789 #if defined(ARCH_X86)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2790 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2791 int i;
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2792 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2793 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2794 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2795 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2796 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2797 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2798 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2799 "mov %%"REG_b", %6 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2800 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2801 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2802 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2803 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2804 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2805 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2806 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2807 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2808 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2809 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2810
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2811 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2812
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2813 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2814 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2815 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2816 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2817 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2818 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2819 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2820
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2821 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2822
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2823 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2824 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2825 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2826 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2827 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2828 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2829
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2830 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2831
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2832 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2833 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2834 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2835 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2836 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2837 "mov %5, %%"REG_c" \n\t" // src
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2838 "mov %1, %%"REG_D" \n\t" // buf1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2839 "add $4096, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2840 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2841 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2842 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2843
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2844 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2845 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2846 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2847 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2848
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2849 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2850 "mov %6, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2851 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2852 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2853 "m" (funnyUVCode), "m" (src2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2854 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2855 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2856 #endif
19400
0310c3310360 Fix compilation with -no-PIC and without -fomit-frame-pointer (used by
uau
parents: 19396
diff changeset
2857 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2858 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2859 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2860 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2861 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2862 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2863 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2864 // printf("%d %d %d\n", dstWidth, i, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2865 dst[i] = src1[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2866 dst[i+2048] = src2[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2867 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2868 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2869 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2870 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2871 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2872 long xInc_shr16 = (long) (xInc >> 16);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2873 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2874 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2875 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2876 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2877 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2878 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2879 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2880 "mov %0, %%"REG_S" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2881 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2882 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2883 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2884 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2885 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2886 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2887 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2888 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2889 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2890
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2891 "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2892 "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2893 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2894 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2895 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2896 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2897 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2898 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2899 "movw %%si, 4096(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2900
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2901 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2902 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2903 "add $1, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2904 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2905 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2906
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2907 /* GCC-3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2908 which is needed to support GCC-4.0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2909 #if defined(ARCH_X86_64) && ((__GNUC__ > 3) || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2910 :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2911 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2912 :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2913 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2914 "r" (src2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2915 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2916 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2917 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2918 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2919 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2920 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2921 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2922 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2923 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2924 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2925 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2926 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2927 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2928 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2929 /* slower
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2930 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2931 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2932 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2933 xpos+=xInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2934 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2935 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2936 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2937 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2938
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2939 static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2940 int srcSliceH, uint8_t* dst[], int dstStride[]){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2941
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2942 /* load a few things into local vars to make the code more readable? and faster */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2943 const int srcW= c->srcW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2944 const int dstW= c->dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2945 const int dstH= c->dstH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2946 const int chrDstW= c->chrDstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2947 const int chrSrcW= c->chrSrcW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2948 const int lumXInc= c->lumXInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2949 const int chrXInc= c->chrXInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2950 const int dstFormat= c->dstFormat;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2951 const int srcFormat= c->srcFormat;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2952 const int flags= c->flags;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2953 const int canMMX2BeUsed= c->canMMX2BeUsed;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2954 int16_t *vLumFilterPos= c->vLumFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2955 int16_t *vChrFilterPos= c->vChrFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2956 int16_t *hLumFilterPos= c->hLumFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2957 int16_t *hChrFilterPos= c->hChrFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2958 int16_t *vLumFilter= c->vLumFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2959 int16_t *vChrFilter= c->vChrFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2960 int16_t *hLumFilter= c->hLumFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2961 int16_t *hChrFilter= c->hChrFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2962 int32_t *lumMmxFilter= c->lumMmxFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2963 int32_t *chrMmxFilter= c->chrMmxFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2964 const int vLumFilterSize= c->vLumFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2965 const int vChrFilterSize= c->vChrFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2966 const int hLumFilterSize= c->hLumFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2967 const int hChrFilterSize= c->hChrFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2968 int16_t **lumPixBuf= c->lumPixBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2969 int16_t **chrPixBuf= c->chrPixBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2970 const int vLumBufSize= c->vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2971 const int vChrBufSize= c->vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2972 uint8_t *funnyYCode= c->funnyYCode;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2973 uint8_t *funnyUVCode= c->funnyUVCode;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2974 uint8_t *formatConvBuffer= c->formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2975 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2976 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2977 int lastDstY;
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2978 uint8_t *pal=NULL;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2979
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2980 /* vars whch will change and which we need to storw back in the context */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2981 int dstY= c->dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2982 int lumBufIndex= c->lumBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2983 int chrBufIndex= c->chrBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2984 int lastInLumBuf= c->lastInLumBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2985 int lastInChrBuf= c->lastInChrBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2986
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2987 if(isPacked(c->srcFormat)){
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2988 pal= src[1];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2989 src[0]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2990 src[1]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2991 src[2]= src[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2992 srcStride[0]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2993 srcStride[1]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2994 srcStride[2]= srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2995 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2996 srcStride[1]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2997 srcStride[2]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2998
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2999 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3000 // (int)dst[0], (int)dst[1], (int)dst[2]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3001
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3002 #if 0 //self test FIXME move to a vfilter or something
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3003 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3004 static volatile int i=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3005 i++;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
3006 if(srcFormat==PIX_FMT_YUV420P && i==1 && srcSliceH>= c->srcH)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3007 selfTest(src, srcStride, c->srcW, c->srcH);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3008 i--;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3009 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3010 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3011
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3012 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3013 //dstStride[0],dstStride[1],dstStride[2]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3014
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3015 if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3016 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3017 static int firstTime=1; //FIXME move this into the context perhaps
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3018 if(flags & SWS_PRINT_INFO && firstTime)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3019 {
21981
a14ddab18acd Pass a context to av_log(), when possible
lucabe
parents: 21760
diff changeset
3020 av_log(c, AV_LOG_WARNING, "SwScaler: Warning: dstStride is not aligned!\n"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3021 "SwScaler: ->cannot do aligned memory acesses anymore\n");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3022 firstTime=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3023 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3024 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3025
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3026 /* Note the user might start scaling the picture in the middle so this will not get executed
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3027 this is not really intended but works currently, so ppl might do it */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3028 if(srcSliceY ==0){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3029 lumBufIndex=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3030 chrBufIndex=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3031 dstY=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3032 lastInLumBuf= -1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3033 lastInChrBuf= -1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3034 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3035
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3036 lastDstY= dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3037
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3038 for(;dstY < dstH; dstY++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3039 unsigned char *dest =dst[0]+dstStride[0]*dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3040 const int chrDstY= dstY>>c->chrDstVSubSample;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3041 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3042 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3043
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3044 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3045 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3046 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3047 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3048
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3049 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3050 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3051 //handle holes (FAST_BILINEAR & weird filters)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3052 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3053 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3054 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3055 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3056 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3057
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3058 // Do we have enough lines in this slice to output the dstY line
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3059 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3060 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3061 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3062 while(lastInLumBuf < lastLumSrcY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3063 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3064 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3065 lumBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3066 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3067 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3068 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3069 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3070 // printf("%d %d\n", lumBufIndex, vLumBufSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3071 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3072 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3073 funnyYCode, c->srcFormat, formatConvBuffer,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
3074 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3075 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3076 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3077 while(lastInChrBuf < lastChrSrcY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3078 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3079 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3080 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3081 chrBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3082 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3083 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3084 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3085 //FIXME replace parameters through context struct (some at least)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3086
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3087 if(!(isGray(srcFormat) || isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3088 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3089 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3090 funnyUVCode, c->srcFormat, formatConvBuffer,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
3091 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3092 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3093 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3094 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3095 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3096 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3097 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3098 else // not enough lines left in this slice -> load the rest in the buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3099 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3100 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3101 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3102 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3103 vChrBufSize, vLumBufSize);*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3104
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3105 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3106 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3107 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3108 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3109 lumBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3110 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3111 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3112 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3113 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3114 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3115 funnyYCode, c->srcFormat, formatConvBuffer,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
3116 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3117 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3118 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3119 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3120 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3121 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3122 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3123 chrBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3124 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3125 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3126 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3127
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3128 if(!(isGray(srcFormat) || isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3129 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3130 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3131 funnyUVCode, c->srcFormat, formatConvBuffer,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
3132 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3133 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3134 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3135 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3136 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3137 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3138 break; //we can't output a dstY line so let's try with the next slice
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3139 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3140
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3141 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3142 b5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3143 g6Dither= dither4[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3144 g5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3145 r5Dither= dither8[(dstY+1)&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3146 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3147 if(dstY < dstH-2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3148 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3149 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3150 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3151 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3152 int i;
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3153 if(flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3154 for(i=0; i<vLumFilterSize; i+=2){
21756
b41f4217d57a Add some explicit casts to avoid some warnings
lucabe
parents: 21686
diff changeset
3155 lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ];
b41f4217d57a Add some explicit casts to avoid some warnings
lucabe
parents: 21686
diff changeset
3156 lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)];
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3157 lumMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3158 lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3159 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3160 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3161 for(i=0; i<vChrFilterSize; i+=2){
21756
b41f4217d57a Add some explicit casts to avoid some warnings
lucabe
parents: 21686
diff changeset
3162 chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ];
b41f4217d57a Add some explicit casts to avoid some warnings
lucabe
parents: 21686
diff changeset
3163 chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)];
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3164 chrMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3165 chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3166 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3167 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3168 }else{
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3169 for(i=0; i<vLumFilterSize; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3170 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3171 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3172 lumMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3173 lumMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3174 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3175 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3176 for(i=0; i<vChrFilterSize; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3177 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3178 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3179 chrMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3180 chrMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3181 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3182 }
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3183 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3184 #endif
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
3185 if(dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3186 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3187 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3188 RENAME(yuv2nv12X)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3189 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3190 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3191 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3192 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3193 else if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3194 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3195 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3196 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3197 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3198 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3199 int16_t *lumBuf = lumPixBuf[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3200 int16_t *chrBuf= chrPixBuf[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3201 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3202 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3203 else //General YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3204 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3205 RENAME(yuv2yuvX)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3206 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3207 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3208 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3209 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3210 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3211 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3212 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3213 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3214 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3215 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3216 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3217 int chrAlpha= vChrFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3218 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3219 dest, dstW, chrAlpha, dstFormat, flags, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3220 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3221 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3222 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3223 int lumAlpha= vLumFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3224 int chrAlpha= vChrFilter[2*dstY+1];
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3225 lumMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3226 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3227 chrMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3228 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3229 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3230 dest, dstW, lumAlpha, chrAlpha, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3231 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3232 else //General RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3233 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3234 RENAME(yuv2packedX)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3235 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3236 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3237 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3238 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3239 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3240 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3241 else // hmm looks like we can't use MMX here without overwriting this array's tail
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3242 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3243 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3244 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
3245 if(dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3246 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3247 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3248 yuv2nv12XinC(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3249 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3250 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3251 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3252 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3253 else if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3254 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3255 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3256 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3257 yuv2yuvXinC(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3258 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3259 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3260 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3261 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3262 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3263 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3264 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3265 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3266 yuv2packedXinC(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3267 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3268 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3269 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3270 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3271 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3272 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3273
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3274 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3275 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3276 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3277 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3278 /* store changed local vars back in the context */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3279 c->dstY= dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3280 c->lumBufIndex= lumBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3281 c->chrBufIndex= chrBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3282 c->lastInLumBuf= lastInLumBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3283 c->lastInChrBuf= lastInChrBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3284
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3285 return dstY - lastDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3286 }