annotate libswscale/swscale_template.c @ 22598:f39115ea61bb

Add AmigaOS support, patch by Andrea Palmat, andrea amigasoft net.
author diego
date Thu, 15 Mar 2007 17:06:28 +0000
parents 508e55817748
children 29827d88d2da
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1 /*
20094
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
3 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
4 * This file is part of FFmpeg.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
5 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
6 * FFmpeg is free software; you can redistribute it and/or modify
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
7 * it under the terms of the GNU General Public License as published by
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
8 * the Free Software Foundation; either version 2 of the License, or
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
9 * (at your option) any later version.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
10 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
11 * FFmpeg is distributed in the hope that it will be useful,
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
14 * GNU General Public License for more details.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
15 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
16 * You should have received a copy of the GNU General Public License
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
17 * along with FFmpeg; if not, write to the Free Software
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
19 *
21029
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed.
lucabe
parents: 20946
diff changeset
20 * the C code (not assembly, mmx, ...) of this file can be used
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed.
lucabe
parents: 20946
diff changeset
21 * under the LGPL license too
20094
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'.
diego
parents: 20015
diff changeset
22 */
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
23
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
24 #undef REAL_MOVNTQ
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
25 #undef MOVNTQ
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
26 #undef PAVGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
27 #undef PREFETCH
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
28 #undef PREFETCHW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
29 #undef EMMS
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
30 #undef SFENCE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
31
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
32 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
33 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
34 #define EMMS "femms"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
35 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
36 #define EMMS "emms"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
37 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
38
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
39 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
40 #define PREFETCH "prefetch"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
41 #define PREFETCHW "prefetchw"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
42 #elif defined ( HAVE_MMX2 )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
43 #define PREFETCH "prefetchnta"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
44 #define PREFETCHW "prefetcht0"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
45 #else
20724
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20589
diff changeset
46 #define PREFETCH " # nop"
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20589
diff changeset
47 #define PREFETCHW " # nop"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
48 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
49
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
50 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
51 #define SFENCE "sfence"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
52 #else
20724
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS
gpoirier
parents: 20589
diff changeset
53 #define SFENCE " # nop"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
54 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
55
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
56 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
57 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
58 #elif defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
59 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
60 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
61
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
62 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
63 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
64 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
65 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
66 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
67 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
68
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
69 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
70 #include "swscale_altivec_template.c"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
71 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
72
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
73 #define YSCALEYUV2YV12X(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
74 asm volatile(\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
75 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
76 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
77 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
78 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
79 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
80 ASMALIGN(4) /* FIXME Unroll? */\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
81 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
82 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
83 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
84 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
85 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
86 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
87 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
88 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
89 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
90 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
91 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
92 " jnz 1b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
93 "psraw $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
94 "psraw $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
95 "packuswb %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
96 MOVNTQ(%%mm3, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
97 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
98 "cmp %2, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
99 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
100 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
101 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
102 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
103 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
104 :: "r" (&c->redDither),\
21325
963e85e82154 Change "p" asm constraints to "g", since "p" was a no longer necessary hack to
reimar
parents: 21029
diff changeset
105 "r" (dest), "g" (width)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
106 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
107 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
108
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
109 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
110 asm volatile(\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
111 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
112 "xor %%"REG_a", %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
113 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
114 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
115 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
116 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
117 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
118 ASMALIGN(4) \
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
119 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
120 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
121 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
122 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
123 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
124 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
125 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
126 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
127 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
128 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
129 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
130 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
131 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
132 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
133 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
134 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
135 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
136 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
137 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
138 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
139 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
140 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
141 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
142 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
143 " jnz 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
144 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
145 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
146 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
147 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
148 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
149 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
150 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
151 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
152 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
153 "psraw $3, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
154 "psraw $3, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
155 "packuswb %%mm6, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
156 MOVNTQ(%%mm4, (%1, %%REGa))\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
157 "add $8, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
158 "cmp %2, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
159 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
160 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
161 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
162 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
163 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
164 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
165 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
166 :: "r" (&c->redDither),\
21325
963e85e82154 Change "p" asm constraints to "g", since "p" was a no longer necessary hack to
reimar
parents: 21029
diff changeset
167 "r" (dest), "g" (width)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
168 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
169 );
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
170
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
171 #define YSCALEYUV2YV121 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
172 "mov %2, %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
173 ASMALIGN(4) /* FIXME Unroll? */\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
174 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
175 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
176 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
177 "psraw $7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
178 "psraw $7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
179 "packuswb %%mm1, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
180 MOVNTQ(%%mm0, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
181 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
182 "jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
183
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
184 /*
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
185 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
186 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
187 "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
188 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
189 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
190 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
191 #define YSCALEYUV2PACKEDX \
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
192 asm volatile(\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
193 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
194 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
195 "nop \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
196 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
197 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
198 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
199 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
200 "movq %%mm3, %%mm4 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
201 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
202 "2: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
203 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
204 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
205 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
206 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
207 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
208 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
209 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
210 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
211 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
212 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
213 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
214 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
215 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
216 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
217 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
218 "movq %%mm1, %%mm7 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
219 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
220 "2: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
221 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
222 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
223 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
224 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
225 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
226 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
227 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
228 "paddw %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
229 "paddw %%mm5, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
230 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
231 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
232
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
233 #define YSCALEYUV2PACKEDX_END\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
234 :: "r" (&c->redDither), \
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
235 "m" (dummy), "m" (dummy), "m" (dummy),\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
236 "r" (dest), "m" (dstW)\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
237 : "%"REG_a, "%"REG_d, "%"REG_S\
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
238 );
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
239
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
240 #define YSCALEYUV2PACKEDX_ACCURATE \
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
241 asm volatile(\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
242 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
243 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
244 "nop \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
245 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
246 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
247 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
248 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
249 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
250 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
251 "pxor %%mm7, %%mm7 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
252 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
253 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
254 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
255 "movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
256 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
257 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
258 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
259 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
260 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
261 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
262 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
263 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
264 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
265 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
266 "movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
267 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
268 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
269 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
270 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
271 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
272 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
273 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
274 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
275 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
276 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
277 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
278 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
279 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
280 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
281 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
282 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
283 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
284 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
285 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
286 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
287 "movq %%mm4, "U_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
288 "movq %%mm6, "V_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
289 \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
290 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
291 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
292 "pxor %%mm1, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
293 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
294 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
295 "pxor %%mm6, %%mm6 \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
296 ASMALIGN(4)\
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
297 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
298 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
299 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
300 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
301 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
302 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
303 "punpcklwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
304 "punpckhwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
305 "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
306 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
307 "pmaddwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
308 "paddd %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
309 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
310 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
311 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
312 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
313 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
314 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
315 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
316 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
317 "pmaddwd %%mm4, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
318 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
319 "paddd %%mm2, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
320 "paddd %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
321 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
322 "psrad $16, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
323 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
324 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
325 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
326 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
327 "packssdw %%mm5, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
328 "packssdw %%mm6, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
329 "paddw %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
330 "paddw %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
331 "movq "U_TEMP"(%0), %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
332 "movq "V_TEMP"(%0), %%mm4 \n\t"\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
333
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
334 #define YSCALEYUV2RGBX \
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
335 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
336 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
337 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
338 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
339 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
340 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
341 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
342 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
343 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
344 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
345 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
346 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
347 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
348 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
349 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
350 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
351 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
352 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
353 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
354 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
355 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
356 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
357 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
358 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
359 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
360 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
361 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
362 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
363 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
364 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
365 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
366 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
367 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
368 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
369 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
370 #if 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
371 #define FULL_YSCALEYUV2RGB \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
372 "pxor %%mm7, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
373 "movd %6, %%mm6 \n\t" /*yalpha1*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
374 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
375 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
376 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
377 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
378 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
379 "xor %%"REG_a", %%"REG_a" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
380 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
381 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
382 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
383 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
384 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
385 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
386 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
387 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
388 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
389 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
390 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
391 "movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
392 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
393 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
394 "movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
395 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
396 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
397 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
398 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
399 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
400 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
401 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
402 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
403 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
404 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
405 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
406 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
407 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
408 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
409 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
410 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
411 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
412 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
413 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
414 "paddw %%mm1, %%mm3 \n\t" /* B*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
415 "paddw %%mm1, %%mm0 \n\t" /* R*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
416 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
417 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
418 "packuswb %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
419 "paddw %%mm4, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
420 "paddw %%mm2, %%mm1 \n\t" /* G*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
421 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
422 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
423 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
424
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
425 #define REAL_YSCALEYUV2PACKED(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
426 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
427 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
428 "psraw $3, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
429 "psraw $3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
430 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
431 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
432 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
433 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
434 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
435 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
436 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
437 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
438 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
439 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
440 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
441 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
442 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
443 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
444 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
445 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
446 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
447 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
448 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
449 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
450 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
451 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
452 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
453 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
454 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
455 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
456 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
457 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
458 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
459 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
460
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
461 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
462
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
463 #define REAL_YSCALEYUV2RGB(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
464 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
465 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
466 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
467 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
468 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
469 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
470 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
471 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
472 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
473 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
474 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
475 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
476 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
477 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
478 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
479 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
480 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
481 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
482 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
483 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
484 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
485 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
486 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
487 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
488 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
489 "movq 8(%0, "#index", 2), %%mm6\n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
490 "movq 8(%1, "#index", 2), %%mm7\n\t" /*buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
491 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
492 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
493 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
494 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
495 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
496 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
497 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
498 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
499 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
500 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
501 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
502 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
503 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
504 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
505 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
506 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
507 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
508 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
509 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
510 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
511 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
512 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
513 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
514 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
515 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
516 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
517 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
518 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
519 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
520 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
521 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
522 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
523 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
524 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
525 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
526 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
527 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
528
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
529 #define REAL_YSCALEYUV2PACKED1(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
530 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
531 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
532 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
533 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
534 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
535 "psraw $7, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
536 "psraw $7, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
537 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
538 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
539 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
540 "psraw $7, %%mm7 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
541
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
542 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
543
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
544 #define REAL_YSCALEYUV2RGB1(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
545 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
546 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
547 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
548 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
549 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
550 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
551 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
552 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
553 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
554 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
555 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
556 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
557 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
558 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
559 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
560 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
561 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
562 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
563 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
564 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
565 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
566 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
567 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
568 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
569 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
570 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
571 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
572 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
573 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
574 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
575 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
576 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
577 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
578 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
579 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
580 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
581 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
582 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
583 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
584 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
585 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
586 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
587 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
588 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
589 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
590 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
591 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
592
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
593 #define REAL_YSCALEYUV2PACKED1b(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
594 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
595 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
596 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
597 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
598 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
599 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
600 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
601 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
602 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
603 "psrlw $8, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
604 "psrlw $8, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
605 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
606 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
607 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
608 "psraw $7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
609 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
610
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
611 // do vertical chrominance interpolation
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
612 #define REAL_YSCALEYUV2RGB1b(index, c) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
613 "xor "#index", "#index" \n\t"\
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
614 ASMALIGN(4)\
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
615 "1: \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
616 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
617 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
618 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
619 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
620 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
621 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
622 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
623 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
624 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
625 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
626 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
627 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
628 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
629 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
630 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
631 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
632 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
633 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
634 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
635 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
636 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
637 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
638 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
639 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
640 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
641 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
642 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
643 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
644 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
645 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
646 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
647 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
648 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
649 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
650 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
651 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
652 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
653 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
654 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
655 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
656 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
657 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
658 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
659 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
660 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
661 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
662 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
663 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
664
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
665 #define REAL_WRITEBGR32(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
666 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
667 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
668 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
669 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
670 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
671 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
672 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
673 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
674 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
675 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
676 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
677 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
678 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
679 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
680 MOVNTQ(%%mm0, (dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
681 MOVNTQ(%%mm2, 8(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
682 MOVNTQ(%%mm1, 16(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
683 MOVNTQ(%%mm3, 24(dst, index, 4))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
684 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
685 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
686 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
687 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
688 #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
689
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
690 #define REAL_WRITEBGR16(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
691 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
692 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
693 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
694 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
695 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
696 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
697 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
698 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
699 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
700 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
701 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
702 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
703 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
704 "psllq $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
705 "psllq $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
706 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
707 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
708 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
709 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
710 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
711 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
712 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
713 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
714 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
715 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
716 #define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
717
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
718 #define REAL_WRITEBGR15(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
719 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
720 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
721 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
722 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
723 "psrlq $1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
724 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
725 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
726 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
727 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
728 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
729 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
730 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
731 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
732 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
733 "psllq $2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
734 "psllq $2, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
735 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
736 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
737 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
738 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
739 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
740 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
741 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
742 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
743 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
744 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
745 #define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
746
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
747 #define WRITEBGR24OLD(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
748 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
749 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
750 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
751 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
752 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
753 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
754 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
755 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
756 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
757 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
758 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
759 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
760 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
761 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
762 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
763 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
764 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
765 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
766 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
767 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
768 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
769 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
770 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
771 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
772 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
773 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
774 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
775 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
776 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
777 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
778 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
779 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
780 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
781 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
782 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
783 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
784 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
785 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
786 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
787 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
788 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
789 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
790 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
791 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
792 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
793 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
794 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
795 MOVNTQ(%%mm2, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
796 MOVNTQ(%%mm3, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
797 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
798 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
799 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
800 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
801 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
802
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
803 #define WRITEBGR24MMX(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
804 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
805 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
806 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
807 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
808 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
809 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
810 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
811 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
812 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
813 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
814 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
815 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
816 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
817 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
818 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
819 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
820 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
821 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
822 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
823 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
824 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
825 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
826 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
827 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
828 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
829 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
830 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
831 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
832 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
833 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
834 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
835 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
836 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
837 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
838 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
839 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
840 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
841 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
842 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
843 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
844 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
845 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
846 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
847 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
848 MOVNTQ(%%mm5, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
849 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
850 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
851 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
852 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
853 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
854 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
855
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
856 #define WRITEBGR24MMX2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
857 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
858 "movq "MANGLE(M24A)", %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
859 "movq "MANGLE(M24C)", %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
860 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
861 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
862 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
863 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
864 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
865 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
866 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
867 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
868 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
869 "por %%mm1, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
870 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
871 MOVNTQ(%%mm6, (dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
872 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
873 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
874 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
875 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
876 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
877 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
878 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
879 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
880 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
881 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
882 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
883 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
884 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
885 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
886 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
887 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
888 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
889 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
890 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
891 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
892 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
893 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
894 "por %%mm1, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
895 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
896 MOVNTQ(%%mm6, 16(dst))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
897 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
898 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
899 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
900 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
901 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
902 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
903
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
904 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
905 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
906 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
907 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
908 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
909 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
910 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
911
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
912 #define REAL_WRITEYUY2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
913 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
914 "packuswb %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
915 "packuswb %%mm7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
916 "punpcklbw %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
917 "movq %%mm1, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
918 "punpcklbw %%mm3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
919 "punpckhbw %%mm3, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
920 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
921 MOVNTQ(%%mm1, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
922 MOVNTQ(%%mm7, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
923 \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
924 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
925 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
926 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
927 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
928
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
929
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
930 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
931 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
932 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
933 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
934 #ifdef HAVE_MMX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
935 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
936 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
937 YSCALEYUV2YV12X_ACCURATE( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
938 YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
939 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
940
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
941 YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
942 }else{
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
943 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
944 YSCALEYUV2YV12X( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
945 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
946 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
947
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
948 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
949 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
950 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
951 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
952 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
953 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
954 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
955 #else //HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
956 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
957 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
958 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
959 #endif //!HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
960 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
961 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
962
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
963 static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
964 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
965 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
966 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
967 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
968 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
969 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
970 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
971
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
972 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
973 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
974 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
975 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
976 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
977 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
978 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
979 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
980 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
981 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
982 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
983 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
984
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
985 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
986 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
987 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
988 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
989 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
990 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
991 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
992
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
993 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
994 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
995 :: "r" (lumSrc + dstW), "r" (dest + dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
996 "g" (-dstW)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
997 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
998 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
999 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1000 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1001 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1002 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1003 int val= lumSrc[i]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1004
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1005 if(val&256){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1006 if(val<0) val=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1007 else val=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1008 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1009
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1010 dest[i]= val;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1011 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1012
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1013 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1014 for(i=0; i<chrDstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1015 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1016 int u=chrSrc[i]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1017 int v=chrSrc[i + 2048]>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1018
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1019 if((u|v)&256){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1020 if(u<0) u=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1021 else if (u>255) u=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1022 if(v<0) v=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1023 else if (v>255) v=255;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1024 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1025
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1026 uDest[i]= u;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1027 vDest[i]= v;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1028 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1029 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1030 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1031
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1032
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1033 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1034 * vertical scale YV12 to RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1035 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1036 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1037 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1038 uint8_t *dest, long dstW, long dstY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1039 {
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1040 #ifdef HAVE_MMX
20015
d08ba4508bb0 Fix unused variable warning when compiling with MMX disabled.
diego
parents: 19872
diff changeset
1041 long dummy=0;
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1042 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1043 switch(c->dstFormat){
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1044 case PIX_FMT_RGB32:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1045 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1046 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1047 WRITEBGR32(%4, %5, %%REGa)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1048
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1049 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1050 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1051 case PIX_FMT_BGR24:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1052 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1053 YSCALEYUV2RGBX
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1054 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1055 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1056 WRITEBGR24(%%REGc, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1057
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1058
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1059 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1060 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1061 "r" (dest), "m" (dstW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1062 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1063 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1064 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1065 case PIX_FMT_BGR555:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1066 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1067 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1068 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1069 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1070 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1071 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1072 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1073 #endif
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1074
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1075 WRITEBGR15(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1076 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1077 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1078 case PIX_FMT_BGR565:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1079 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1080 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1081 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1082 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1083 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1084 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1085 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1086 #endif
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1087
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1088 WRITEBGR16(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1089 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1090 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1091 case PIX_FMT_YUYV422:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1092 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1093 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1094
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1095 "psraw $3, %%mm3 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1096 "psraw $3, %%mm4 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1097 "psraw $3, %%mm1 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1098 "psraw $3, %%mm7 \n\t"
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1099 WRITEYUY2(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1100 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1101 return;
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1102 }
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1103 }else{
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1104 switch(c->dstFormat)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1105 {
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1106 case PIX_FMT_RGB32:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1107 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1108 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1109 WRITEBGR32(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1110 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1111 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1112 case PIX_FMT_BGR24:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1113 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1114 YSCALEYUV2RGBX
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1115 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1116 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1117 WRITEBGR24(%%REGc, %5, %%REGa)
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1118
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1119 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1120 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1121 "r" (dest), "m" (dstW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1122 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1123 );
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1124 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1125 case PIX_FMT_BGR555:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1126 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1127 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1128 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1129 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1130 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1131 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1132 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1133 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1134
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1135 WRITEBGR15(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1136 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1137 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1138 case PIX_FMT_BGR565:
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1139 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1140 YSCALEYUV2RGBX
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1141 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1142 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1143 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1144 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1145 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1146 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1147
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1148 WRITEBGR16(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1149 YSCALEYUV2PACKEDX_END
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1150 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1151 case PIX_FMT_YUYV422:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1152 YSCALEYUV2PACKEDX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1153 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1154
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1155 "psraw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1156 "psraw $3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1157 "psraw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1158 "psraw $3, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1159 WRITEYUY2(%4, %5, %%REGa)
19173
dbdc58b6e9bb a tiny bit of cleanup
michael
parents: 19172
diff changeset
1160 YSCALEYUV2PACKEDX_END
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1161 return;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1162 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
1163 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1164 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1165 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1166 /* The following list of supported dstFormat values should
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1167 match what's found in the body of altivec_yuv2packedX() */
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1168 if(c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1169 c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1170 c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1171 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1172 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1173 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1174 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1175 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1176 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1177 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1178 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1179 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1180
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1181 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1182 * vertical bilinear scale YV12 to RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1183 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1184 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1185 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1186 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1187 int yalpha1=yalpha^4095;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1188 int uvalpha1=uvalpha^4095;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1189 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1190
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1191 #if 0 //isn't used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1192 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1193 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1194 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1195 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1196 #ifdef HAVE_MMX
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1197 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1198 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1199
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1200
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1201 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1202 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1203 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1204
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1205 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1206 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1207 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1208
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1209 MOVNTQ(%%mm3, (%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1210 MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1211
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1212 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1213 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1214 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1215
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1216
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1217 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1218 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1219 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1220 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1221 break;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1222 case PIX_FMT_BGR24:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1223 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1224
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1225 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1226
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1227 // lsb ... msb
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1228 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1229 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1230
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1231 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1232 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1233 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1234
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1235 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1236 "psrlq $8, %%mm3 \n\t" // GR0BGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1237 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1238 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1239 "por %%mm2, %%mm3 \n\t" // BGRBGR00
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1240 "movq %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1241 "psllq $48, %%mm1 \n\t" // 000000BG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1242 "por %%mm1, %%mm3 \n\t" // BGRBGRBG
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1243
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1244 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1245 "psrld $16, %%mm2 \n\t" // R000R000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1246 "psrlq $24, %%mm1 \n\t" // 0BGR0000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1247 "por %%mm2, %%mm1 \n\t" // RBGRR000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1248
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1249 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1250 "add %%"REG_a", %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1251
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1252 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1253 //FIXME Alignment
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1254 "movntq %%mm3, (%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1255 "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1256 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1257 "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1258 "psrlq $32, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1259 "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1260 "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1261 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1262 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1263 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1264 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1265
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1266 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1267 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1268 : "%"REG_a, "%"REG_b
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1269 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1270 break;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1271 case PIX_FMT_BGR555:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1272 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1273
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1274 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1275 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1276 "paddusb "MANGLE(g5Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1277 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1278 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1279 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1280 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1281 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1282 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1283
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1284 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1285 "psllw $2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1286 "psllw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1287 "pand "MANGLE(g15Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1288 "pand "MANGLE(r15Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1289
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1290 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1291 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1292
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1293 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1294
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1295 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1296 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1297 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1298
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1299 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1300 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1301 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1302 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1303 break;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1304 case PIX_FMT_BGR565:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1305 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1306
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1307 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1308 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1309 "paddusb "MANGLE(g6Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1310 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1311 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1312 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1313 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1314 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1315 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1316
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1317 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1318 "psllw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1319 "psllw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1320 "pand "MANGLE(g16Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1321 "pand "MANGLE(r16Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1322
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1323 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1324 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1325
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1326 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1327
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1328 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1329 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1330 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1331
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1332 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1333 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1334 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1335 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1336 break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1337 #endif
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1338 case PIX_FMT_BGR32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1339 #ifndef HAVE_MMX
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1340 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1341 #endif
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1342 if(dstFormat==PIX_FMT_RGB32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1343 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1344 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1345 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1346 dest++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1347 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1348 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1349 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1350 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1351 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1352 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1353 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1354 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1355 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1356 dest+= 4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1357 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1358 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1359 else if(dstFormat==PIX_FMT_BGR24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1360 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1361 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1362 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1363 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1364 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1365 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1366 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1367 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1368 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1369 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1370 dest+= 3;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1371 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1372 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1373 else if(dstFormat==PIX_FMT_BGR565)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1374 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1375 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1376 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1377 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1378 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1379 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1380 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1381
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1382 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1383 clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1384 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1385 clip_table16r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1386 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1387 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1388 else if(dstFormat==PIX_FMT_BGR555)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1389 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1390 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1391 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1392 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1393 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1394 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1395 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1396
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1397 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1398 clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1399 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1400 clip_table15r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1401 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1402 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1403 }//FULL_UV_IPOL
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1404 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1405 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1406 #endif // if 0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1407 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1408 switch(c->dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1409 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1410 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1411 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1412 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1413 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1414 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1415 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1416 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1417 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1418 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1419 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1420
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1421 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1422 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1423 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1424 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1425 case PIX_FMT_BGR24:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1426 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1427 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1428 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1429 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1430 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1431 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1432 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1433 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1434 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1435 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1436 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1437 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1438 case PIX_FMT_BGR555:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1439 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1440 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1441 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1442 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1443 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1444 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1445 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1446 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1447 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1448 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1449 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1450
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1451 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1452 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1453 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1454
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1455 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1456 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1457 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1458 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1459 case PIX_FMT_BGR565:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1460 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1461 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1462 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1463 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1464 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1465 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1466 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1467 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1468 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1469 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1470 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1471
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1472 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1473 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1474 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1475 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1476 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1477 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1478 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1479 case PIX_FMT_YUYV422:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1480 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1481 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1482 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1483 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1484 YSCALEYUV2PACKED(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1485 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1486 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1487 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1488 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1489 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1490 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1491 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1492 default: break;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1493 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1494 #endif //HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1495 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1496 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1497
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1498 /**
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1499 * YV12 to RGB without scaling or interpolating
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1500 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1501 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1502 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1503 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1504 const int yalpha1=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1505 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1506
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1507 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1508 const int yalpha= 4096; //FIXME ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1509
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1510 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1511 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1512 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1513 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1514 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1515
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1516 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1517 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1518 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1519 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1520 {
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1521 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1522 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1523 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1524 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1525 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1526 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1527 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1528 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1529 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1530
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1531 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1532 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1533 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1534 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1535 case PIX_FMT_BGR24:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1536 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1537 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1538 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1539 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1540 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1541 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1542 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1543 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1544
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1545 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1546 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1547 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1548 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1549 case PIX_FMT_BGR555:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1550 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1551 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1552 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1553 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1554 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1555 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1556 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1557 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1558 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1559 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1560 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1561 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1562 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1563 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1564
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1565 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1566 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1567 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1568 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1569 case PIX_FMT_BGR565:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1570 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1571 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1572 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1573 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1574 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1575 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1576 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1577 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1578 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1579 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1580 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1581
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1582 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1583 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1584 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1585
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1586 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1587 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1588 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1589 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1590 case PIX_FMT_YUYV422:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1591 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1592 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1593 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1594 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1595 YSCALEYUV2PACKED1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1596 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1597 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1598 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1599
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1600 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1601 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1602 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1603 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1604 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1605 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1606 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1607 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1608 switch(dstFormat)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1609 {
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1610 case PIX_FMT_RGB32:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1611 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1612 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1613 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1614 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1615 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1616 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1617 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1618 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1619
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1620 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1621 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1622 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1623 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1624 case PIX_FMT_BGR24:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1625 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1626 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1627 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1628 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1629 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1630 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1631 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1632 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1633
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1634 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1635 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1636 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1637 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1638 case PIX_FMT_BGR555:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1639 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1640 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1641 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1642 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1643 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1644 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1645 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1646 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1647 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1648 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1649 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1650 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1651 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1652 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1653
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1654 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1655 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1656 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1657 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1658 case PIX_FMT_BGR565:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1659 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1660 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1661 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1662 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1663 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1664 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1665 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1666 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1667 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1668 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1669 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1670
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1671 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1672 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1673 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1674
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1675 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1676 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1677 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1678 return;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
1679 case PIX_FMT_YUYV422:
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1680 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1681 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1682 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1683 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1684 YSCALEYUV2PACKED1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1685 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1686 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1687 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1688
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1689 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1690 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1691 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1692 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1693 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1694 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1695 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1696 if( uvalpha < 2048 )
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1697 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1698 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1699 }else{
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1700 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1701 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1702 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1703
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1704 //FIXME yuy2* can read upto 7 samples to much
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1705
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1706 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1707 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1708 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1709 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1710 "movq "MANGLE(bm01010101)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1711 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1712 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1713 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1714 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1715 "pand %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1716 "pand %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1717 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1718 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1719 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1720 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1721 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1722 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1723 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1724 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1725 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1726 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1727 dst[i]= src[2*i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1728 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1729 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1730
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1731 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1732 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1733 #ifdef HAVE_MMX
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1734 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1735 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1736 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1737 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1738 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1739 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1740 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1741 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1742 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1743 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1744 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1745 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1746 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1747 "packuswb %%mm1, %%mm1 \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1748 "movd %%mm0, (%3, %%"REG_a") \n\t"
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1749 "movd %%mm1, (%2, %%"REG_a") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1750 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1751 " js 1b \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1752 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1753 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1754 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1755 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1756 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1757 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1758 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1759 dstU[i]= src1[4*i + 1];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1760 dstV[i]= src1[4*i + 3];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1761 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1762 #endif
21686
79bb02931e40 Fix compilation of non-MMX code with gcc 2.95
lucabe
parents: 21325
diff changeset
1763 assert(src1 == src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1764 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1765
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1766 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1767 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1768 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1769 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1770 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1771 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1772 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1773 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1774 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1775 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1776 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1777 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1778 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1779 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1780 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1781 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1782 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1783 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1784 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1785 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1786 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1787 dst[i]= src[2*i+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1788 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1789 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1790
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1791 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1792 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1793 #ifdef HAVE_MMX
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1794 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1795 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1796 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1797 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1798 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1799 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1800 "pand %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1801 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1802 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1803 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1804 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1805 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1806 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1807 "packuswb %%mm1, %%mm1 \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1808 "movd %%mm0, (%3, %%"REG_a") \n\t"
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1809 "movd %%mm1, (%2, %%"REG_a") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1810 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1811 " js 1b \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1812 : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1813 : "%"REG_a
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1814 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1815 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1816 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1817 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1818 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1819 dstU[i]= src1[4*i + 0];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1820 dstV[i]= src1[4*i + 2];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1821 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1822 #endif
21686
79bb02931e40 Fix compilation of non-MMX code with gcc 2.95
lucabe
parents: 21325
diff changeset
1823 assert(src1 == src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1824 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1825
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1826 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1827 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1828 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1829 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1830 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1831 int b= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1832 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1833 int r= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1834
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1835 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1836 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1837 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1838
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1839 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1840 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1841 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1842 assert(src1 == src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1843 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1844 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1845 const int a= ((uint32_t*)src1)[2*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1846 const int e= ((uint32_t*)src1)[2*i+1];
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1847 const int l= (a&0xFF00FF) + (e&0xFF00FF);
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1848 const int h= (a&0x00FF00) + (e&0x00FF00);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1849 const int b= l&0x3FF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1850 const int g= h>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1851 const int r= l>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1852
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1853 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1854 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1855 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1856 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1857
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1858 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1859 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1860 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1861 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1862 "mov %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1863 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1864 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1865 "pxor %%mm7, %%mm7 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1866 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
1867 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1868 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1869 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1870 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1871 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1872 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1873 "punpcklbw %%mm7, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1874 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1875 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1876 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1877 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1878 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1879 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1880 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1881 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1882 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1883 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1884 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1885 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1886 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1887 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1888 "packssdw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1889 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1890 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1891 "pmaddwd %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1892 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1893 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1894
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1895 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1896 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1897 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1898 "punpcklbw %%mm7, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1899 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1900 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1901 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1902 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1903 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1904 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1905 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1906 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1907 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1908 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1909 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1910 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1911 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1912 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1913 "packssdw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1914 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1915 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1916 "pmaddwd %%mm5, %%mm2 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1917 "add $24, %%"REG_d" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1918 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1919 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1920
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1921 "packuswb %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1922 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1923
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1924 "movq %%mm0, (%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1925 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1926 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1927 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1928 : "%"REG_a, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1929 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1930 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1931 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1932 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1933 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1934 int b= src[i*3+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1935 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1936 int r= src[i*3+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1937
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1938 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1939 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1940 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1941 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1942
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1943 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1944 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1945 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1946 asm volatile(
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1947 "mov %3, %%"REG_a" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1948 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1949 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1950 "pxor %%mm7, %%mm7 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1951 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1952 "add %%"REG_d", %%"REG_d" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
1953 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1954 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1955 PREFETCH" 64(%0, %%"REG_d") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1956 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1957 "movq (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1958 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1959 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1960 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1961 "psrlq $24, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1962 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1963 PAVGB(%%mm1, %%mm0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1964 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1965 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1966 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1967 #else
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1968 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1969 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1970 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1971 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1972 "paddw %%mm2, %%mm0 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1973 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
1974 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1975 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1976 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1977 "paddw %%mm4, %%mm2 \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1978 "psrlw $1, %%mm0 \n\t"
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
1979 "psrlw $1, %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1980 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1981 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1982 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1983
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1984 "pmaddwd %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1985 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1986 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1987 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1988 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1989 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1990 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1991 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1992 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1993 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1994 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1995 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1996 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1997 "pmaddwd %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1998 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
1999 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2000
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2001 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2002 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2003 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2004 "movq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2005 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2006 "psrlq $24, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2007 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2008 PAVGB(%%mm1, %%mm4)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2009 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2010 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2011 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2012 #else
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2013 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2014 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2015 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2016 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2017 "paddw %%mm2, %%mm4 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2018 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2019 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2020 "punpcklbw %%mm7, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2021 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2022 "paddw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2023 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2024 "psrlw $2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2025 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2026 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2027 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2028 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2029
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2030 "pmaddwd %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2031 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2032 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2033 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2034 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2035 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2036 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2037 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2038 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2039 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2040 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2041 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2042 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2043 "pmaddwd %%mm5, %%mm1 \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2044 "add $24, %%"REG_d" \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2045 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2046 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2047
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2048 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2049 "punpckldq %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2050 "punpckhdq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2051 "packsswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2052 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2053
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2054 "movd %%mm0, (%1, %%"REG_a") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2055 "punpckhdq %%mm0, %%mm0 \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2056 "movd %%mm0, (%2, %%"REG_a") \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2057 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2058 " js 1b \n\t"
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2059 : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2060 : "%"REG_a, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2061 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2062 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2063 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2064 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2065 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2066 int b= src1[6*i + 0] + src1[6*i + 3];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2067 int g= src1[6*i + 1] + src1[6*i + 4];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2068 int r= src1[6*i + 2] + src1[6*i + 5];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2069
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2070 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2071 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2072 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2073 #endif
21686
79bb02931e40 Fix compilation of non-MMX code with gcc 2.95
lucabe
parents: 21325
diff changeset
2074 assert(src1 == src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2075 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2076
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2077 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2078 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2079 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2080 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2081 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2082 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2083 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2084 int g= (d>>5)&0x3F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2085 int r= (d>>11)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2086
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2087 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2088 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2089 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2090
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2091 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2092 {
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2093 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2094 assert(src1==src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2095 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2096 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2097 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2098
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2099 int dl= (d0&0x07E0F81F);
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2100 int dh= ((d0>>5)&0x07C0F83F);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2101
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2102 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2103 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2104
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2105 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2106 int r= (d>>11)&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2107 int g= d>>21;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2108 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2109 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2110 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2111 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2112
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2113 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2114 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2115 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2116 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2117 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2118 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2119 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2120 int g= (d>>5)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2121 int r= (d>>10)&0x1F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2122
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2123 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2124 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2125 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2126
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2127 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2128 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2129 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2130 assert(src1==src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2131 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2132 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2133 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2134
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2135 int dl= (d0&0x03E07C1F);
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2136 int dh= ((d0>>5)&0x03E0F81F);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2137
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2138 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2139 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2140
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2141 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2142 int r= (d>>10)&0x7F;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2143 int g= d>>21;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2144 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2145 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2146 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2147 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2148
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2149
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2150 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2151 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2152 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2153 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2154 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2155 int r= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2156 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2157 int b= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2158
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2159 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2160 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2161 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2162
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2163 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2164 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2165 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2166 assert(src1==src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2167 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2168 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2169 const int a= ((uint32_t*)src1)[2*i+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2170 const int e= ((uint32_t*)src1)[2*i+1];
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2171 const int l= (a&0xFF00FF) + (e&0xFF00FF);
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2172 const int h= (a&0x00FF00) + (e&0x00FF00);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2173 const int r= l&0x3FF;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2174 const int g= h>>8;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2175 const int b= l>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2176
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2177 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2178 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2179 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2180 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2181
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2182 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2183 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2184 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2185 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2186 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2187 int r= src[i*3+0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2188 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2189 int b= src[i*3+2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2190
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2191 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2192 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2193 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2194
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2195 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2196 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2197 int i;
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2198 assert(src1==src2);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2199 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2200 {
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2201 int r= src1[6*i + 0] + src1[6*i + 3];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2202 int g= src1[6*i + 1] + src1[6*i + 4];
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2203 int b= src1[6*i + 2] + src1[6*i + 5];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2204
20945
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2205 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
92150c16e737 fixing the lamest bug in swscale, all the rgb/bgr->* code did 2x2 downsampling for chroma, it should just be 2x1 (the rest of the code also belived its 2x1 ...)
michael
parents: 20724
diff changeset
2206 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2207 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2208 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2209
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2210 static inline void RENAME(rgb16ToY)(uint8_t *dst, uint8_t *src, int width)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2211 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2212 int i;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2213 for(i=0; i<width; i++)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2214 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2215 int d= ((uint16_t*)src)[i];
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2216 int r= d&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2217 int g= (d>>5)&0x3F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2218 int b= (d>>11)&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2219
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2220 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2221 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2222 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2223
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2224 static inline void RENAME(rgb16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2225 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2226 int i;
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2227 assert(src1 == src2);
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2228 for(i=0; i<width; i++)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2229 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2230 int d0= ((uint32_t*)src1)[i];
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2231
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2232 int dl= (d0&0x07E0F81F);
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2233 int dh= ((d0>>5)&0x07C0F83F);
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2234
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2235 int dh2= (dh>>11) + (dh<<21);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2236 int d= dh2 + dl;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2237
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2238 int r= d&0x7F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2239 int b= (d>>11)&0x7F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2240 int g= d>>21;
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2241 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2242 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2243 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2244 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2245
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2246 static inline void RENAME(rgb15ToY)(uint8_t *dst, uint8_t *src, int width)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2247 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2248 int i;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2249 for(i=0; i<width; i++)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2250 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2251 int d= ((uint16_t*)src)[i];
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2252 int r= d&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2253 int g= (d>>5)&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2254 int b= (d>>10)&0x1F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2255
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2256 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2257 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2258 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2259
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2260 static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2261 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2262 int i;
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2263 assert(src1 == src2);
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2264 for(i=0; i<width; i++)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2265 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2266 int d0= ((uint32_t*)src1)[i];
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2267
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2268 int dl= (d0&0x03E07C1F);
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2269 int dh= ((d0>>5)&0x03E0F81F);
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2270
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2271 int dh2= (dh>>11) + (dh<<21);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2272 int d= dh2 + dl;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2273
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2274 int g= d&0x7F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2275 int r= (d>>10)&0x7F;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2276 int b= d>>21;
20946
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2277 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
bb4c952bc52c forgotten 2 converters (yeah svn up, svn di svn ci isnt enough i should actually look at the code after svn up not just the diff ...)
michael
parents: 20945
diff changeset
2278 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2279 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2280 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2281
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2282 static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, int width, uint32_t *pal)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2283 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2284 int i;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2285 for(i=0; i<width; i++)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2286 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2287 int d= src[i];
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2288 int b= pal[d] &0xFF;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2289 int g=(pal[d]>>8 )&0xFF;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2290 int r= pal[d]>>16;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2291
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2292 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2293 }
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2294 }
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2295
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2296 static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width, uint32_t *pal)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2297 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2298 int i;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2299 assert(src1 == src2);
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2300 for(i=0; i<width; i++)
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2301 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2302 int d0= src1[2*i ];
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2303 int d1= src1[2*i+1];
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2304 int p = (pal[d0]&0xFF00FF) + (pal[d1]&0xFF00FF);
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2305 int g = (pal[d0]+pal[d1]-p)>>8;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2306 int b= p&0x1FF;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2307 int r= p>>16;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2308
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2309 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2310 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2311 }
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2312 }
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2313
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2314 // Bilinear / Bicubic scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2315 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2316 int16_t *filter, int16_t *filterPos, long filterSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2317 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2318 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2319 assert(filterSize % 4 == 0 && filterSize>0);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2320 if(filterSize==4) // allways true for upscaling, sometimes for down too
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2321 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2322 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2323 filter-= counter*2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2324 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2325 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2326 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2327 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2328 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2329 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2330 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2331 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2332 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2333 "mov %%"REG_a", %%"REG_BP" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2334 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2335 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2336 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2337 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2338 "movq (%1, %%"REG_BP", 4), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2339 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2340 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2341 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2342 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2343 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2344 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2345 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2346 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2347 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2348 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2349 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2350 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2351 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2352 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2353 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2354
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2355 "pop %%"REG_BP" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2356 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2357 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2358 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2359 : "+a" (counter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2360 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2361 #if !defined(PIC)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2362 : "%"REG_b
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2363 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2364 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2365 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2366 else if(filterSize==8)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2367 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2368 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2369 filter-= counter*4;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2370 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2371 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2372 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2373 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2374 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2375 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2376 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2377 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2378 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2379 "mov %%"REG_a", %%"REG_BP" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2380 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2381 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2382 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2383 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2384 "movq (%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2385 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2386 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2387 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2388 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2389 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2390 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2391 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2392
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2393 "movq 8(%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2394 "movq 24(%1, %%"REG_BP", 8), %%mm5\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2395 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2396 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2397 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2398 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2399 "pmaddwd %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2400 "pmaddwd %%mm2, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2401 "paddd %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2402 "paddd %%mm5, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2403
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2404 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2405 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2406 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2407 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2408 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2409 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2410 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2411 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2412
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2413 "pop %%"REG_BP" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2414 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2415 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2416 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2417 : "+a" (counter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2418 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2419 #if !defined(PIC)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2420 : "%"REG_b
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2421 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2422 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2423 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2424 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2425 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2426 uint8_t *offset = src+filterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2427 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2428 // filter-= counter*filterSize/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2429 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2430 dst-= counter/2;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2431 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2432 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2433 "movq "MANGLE(w02)", %%mm6 \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2434 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2435 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2436 "mov %2, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2437 "movzwl (%%"REG_c", %0), %%eax \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2438 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2439 "mov %5, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2440 "pxor %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2441 "pxor %%mm5, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2442 "2: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2443 "movq (%1), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2444 "movq (%1, %6), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2445 "movd (%%"REG_c", %%"REG_a"), %%mm0\n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2446 "movd (%%"REG_c", %%"REG_d"), %%mm2\n\t"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2447 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2448 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2449 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2450 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2451 "paddd %%mm3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2452 "paddd %%mm0, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2453 "add $8, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2454 "add $4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2455 "cmp %4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2456 " jb 2b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2457 "add %6, %1 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2458 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2459 "psrad $8, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2460 "packssdw %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2461 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2462 "packssdw %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2463 "mov %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2464 "movd %%mm4, (%%"REG_a", %0) \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2465 "add $4, %0 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2466 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2467
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2468 : "+r" (counter), "+r" (filter)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2469 : "m" (filterPos), "m" (dst), "m"(offset),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2470 "m" (src), "r" (filterSize*2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2471 : "%"REG_a, "%"REG_c, "%"REG_d
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2472 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2473 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2474 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2475 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2476 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2477 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2478 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2479 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2480 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2481 int j;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2482 int srcPos= filterPos[i];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2483 int val=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2484 // printf("filterPos: %d\n", filterPos[i]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2485 for(j=0; j<filterSize; j++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2486 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2487 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2488 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2489 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2490 // filter += hFilterSize;
22321
5f47768cbda5 Add av_ prefix to clip functions
reimar
parents: 22226
diff changeset
2491 dst[i] = av_clip(val>>7, 0, (1<<15)-1); // the cubic equation does overflow ...
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2492 // dst[i] = val>>7;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2493 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2494 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2495 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2496 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2497 // *** horizontal scale Y line to temp buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2498 static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2499 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2500 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2501 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2502 int32_t *mmx2FilterPos, uint8_t *pal)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2503 {
20411
208c6a5b8665 16-bit grayscale support
kostya
parents: 20094
diff changeset
2504 if(srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2505 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2506 RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2507 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2508 }
20411
208c6a5b8665 16-bit grayscale support
kostya
parents: 20094
diff changeset
2509 else if(srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2510 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2511 RENAME(uyvyToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2512 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2513 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2514 else if(srcFormat==PIX_FMT_RGB32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2515 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2516 RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2517 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2518 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2519 else if(srcFormat==PIX_FMT_BGR24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2520 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2521 RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2522 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2523 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2524 else if(srcFormat==PIX_FMT_BGR565)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2525 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2526 RENAME(bgr16ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2527 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2528 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2529 else if(srcFormat==PIX_FMT_BGR555)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2530 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2531 RENAME(bgr15ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2532 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2533 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2534 else if(srcFormat==PIX_FMT_BGR32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2535 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2536 RENAME(rgb32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2537 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2538 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2539 else if(srcFormat==PIX_FMT_RGB24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2540 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2541 RENAME(rgb24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2542 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2543 }
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2544 else if(srcFormat==PIX_FMT_RGB565)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2545 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2546 RENAME(rgb16ToY)(formatConvBuffer, src, srcW);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2547 src= formatConvBuffer;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2548 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2549 else if(srcFormat==PIX_FMT_RGB555)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2550 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2551 RENAME(rgb15ToY)(formatConvBuffer, src, srcW);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2552 src= formatConvBuffer;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2553 }
22226
1b1286436771 BGR/RGB4 byte formats as input
michael
parents: 22218
diff changeset
2554 else if(srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2555 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2556 RENAME(palToY)(formatConvBuffer, src, srcW, pal);
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2557 src= formatConvBuffer;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2558 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2559
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2560 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2561 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2562 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2563 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2564 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2565 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2566 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2567 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2568 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2569 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2570 {
20576
9e7c80f126d6 Use common define for x86_32 and x86_64.
diego
parents: 20411
diff changeset
2571 #if defined(ARCH_X86)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2572 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2573 int i;
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2574 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2575 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2576 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2577 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2578 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2579 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2580 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2581 "mov %%"REG_b", %5 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2582 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2583 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2584 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2585 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2586 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2587 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2588 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2589 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2590 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2591 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2592
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2593 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2594
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2595 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2596 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2597 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2598 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2599 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2600 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2601 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2602
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2603 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2604
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2605 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2606 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2607 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2608 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2609 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2610 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2611
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2612 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2613
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2614 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2615 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2616 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2617 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2618 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2619 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2620 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2621 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2622
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2623 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2624 "mov %5, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2625 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2626 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2627 "m" (funnyYCode)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2628 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2629 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2630 #endif
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2631 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2632 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2633 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2634 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2635 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2636 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2637 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2638 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2639 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2640 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2641 long xInc_shr16 = xInc >> 16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2642 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2643 //NO MMX just normal asm ...
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2644 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2645 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2646 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2647 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2648 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2649 "1: \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2650 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2651 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2652 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2653 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2654 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2655 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2656 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2657 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2658 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2659 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2660 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2661
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2662 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2663 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2664 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2665 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2666 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2667 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2668 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2669 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2670 "movw %%si, 2(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2671 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2672 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2673
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2674
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2675 "add $2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2676 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2677 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2678
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2679
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2680 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2681 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2682 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2683 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2684 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2685 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2686 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2687 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2688 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2689 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2690 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2691 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2692 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2693 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2694 xpos+=xInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2695 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2696 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2697 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2698 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2699
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2700 inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2701 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2702 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2703 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2704 int32_t *mmx2FilterPos, uint8_t *pal)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2705 {
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2706 if(srcFormat==PIX_FMT_YUYV422)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2707 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2708 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2709 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2710 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2711 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2712 else if(srcFormat==PIX_FMT_UYVY422)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2713 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2714 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2715 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2716 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2717 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2718 else if(srcFormat==PIX_FMT_RGB32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2719 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2720 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2721 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2722 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2723 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2724 else if(srcFormat==PIX_FMT_BGR24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2725 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2726 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2727 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2728 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2729 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2730 else if(srcFormat==PIX_FMT_BGR565)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2731 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2732 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2733 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2734 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2735 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2736 else if(srcFormat==PIX_FMT_BGR555)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2737 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2738 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2739 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2740 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2741 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2742 else if(srcFormat==PIX_FMT_BGR32)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2743 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2744 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2745 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2746 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2747 }
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
2748 else if(srcFormat==PIX_FMT_RGB24)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2749 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2750 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2751 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2752 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2753 }
20589
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2754 else if(srcFormat==PIX_FMT_RGB565)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2755 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2756 RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2757 src1= formatConvBuffer;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2758 src2= formatConvBuffer+2048;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2759 }
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2760 else if(srcFormat==PIX_FMT_RGB555)
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2761 {
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2762 RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2763 src1= formatConvBuffer;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2764 src2= formatConvBuffer+2048;
95695bfce2f0 Add support for conversions from the rgb565 and rgb555 formats
lucabe
parents: 20576
diff changeset
2765 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2766 else if(isGray(srcFormat))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2767 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2768 return;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2769 }
22226
1b1286436771 BGR/RGB4 byte formats as input
michael
parents: 22218
diff changeset
2770 else if(srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2771 {
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2772 RENAME(palToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW, pal);
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2773 src1= formatConvBuffer;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2774 src2= formatConvBuffer+2048;
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2775 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2776
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2777 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2778 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2779 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2780 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2781 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2782 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2783 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2784 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2785 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2786 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2787 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2788 {
20576
9e7c80f126d6 Use common define for x86_32 and x86_64.
diego
parents: 20411
diff changeset
2789 #if defined(ARCH_X86)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2790 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2791 int i;
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2792 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2793 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2794 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2795 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2796 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2797 asm volatile(
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2798 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2799 "mov %%"REG_b", %6 \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2800 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2801 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2802 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2803 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2804 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2805 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2806 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2807 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2808 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2809 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2810
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2811 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2812
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2813 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2814 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2815 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2816 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2817 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2818 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2819 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2820
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2821 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2822
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2823 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2824 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2825 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2826 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2827 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2828 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2829
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2830 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2831
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2832 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2833 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2834 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2835 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2836 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2837 "mov %5, %%"REG_c" \n\t" // src
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2838 "mov %1, %%"REG_D" \n\t" // buf1
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2839 "add $4096, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2840 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2841 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2842 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2843
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2844 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2845 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2846 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2847 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2848
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2849 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2850 "mov %6, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2851 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2852 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2853 "m" (funnyUVCode), "m" (src2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2854 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2855 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2856 #endif
19400
0310c3310360 Fix compilation with -no-PIC and without -fomit-frame-pointer (used by
uau
parents: 19396
diff changeset
2857 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2858 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2859 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2860 #endif
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2861 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2862 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2863 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2864 // printf("%d %d %d\n", dstWidth, i, srcW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2865 dst[i] = src1[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2866 dst[i+2048] = src2[srcW-1]*128;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2867 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2868 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2869 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2870 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2871 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2872 long xInc_shr16 = (long) (xInc >> 16);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2873 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2874 asm volatile(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2875 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2876 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2877 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 19181
diff changeset
2878 ASMALIGN(4)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2879 "1: \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2880 "mov %0, %%"REG_S" \n\t"
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2881 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2882 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2883 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2884 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2885 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2886 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2887 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2888 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2889 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2890
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2891 "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2892 "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2893 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2894 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2895 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2896 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2897 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2898 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2899 "movw %%si, 4096(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2900
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2901 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2902 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2903 "add $1, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2904 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2905 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2906
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2907 /* GCC-3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2908 which is needed to support GCC-4.0 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2909 #if defined(ARCH_X86_64) && ((__GNUC__ > 3) || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2910 :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2911 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2912 :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2913 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2914 "r" (src2)
19396
8fe37c66d10a -fPIC support for libswscale
diego
parents: 19372
diff changeset
2915 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2916 );
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2917 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2918 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2919 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2920 #else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2921 int i;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2922 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2923 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2924 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2925 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2926 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2927 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2928 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2929 /* slower
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2930 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2931 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2932 */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2933 xpos+=xInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2934 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2935 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2936 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2937 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2938
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2939 static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2940 int srcSliceH, uint8_t* dst[], int dstStride[]){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2941
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2942 /* load a few things into local vars to make the code more readable? and faster */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2943 const int srcW= c->srcW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2944 const int dstW= c->dstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2945 const int dstH= c->dstH;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2946 const int chrDstW= c->chrDstW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2947 const int chrSrcW= c->chrSrcW;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2948 const int lumXInc= c->lumXInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2949 const int chrXInc= c->chrXInc;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2950 const int dstFormat= c->dstFormat;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2951 const int srcFormat= c->srcFormat;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2952 const int flags= c->flags;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2953 const int canMMX2BeUsed= c->canMMX2BeUsed;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2954 int16_t *vLumFilterPos= c->vLumFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2955 int16_t *vChrFilterPos= c->vChrFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2956 int16_t *hLumFilterPos= c->hLumFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2957 int16_t *hChrFilterPos= c->hChrFilterPos;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2958 int16_t *vLumFilter= c->vLumFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2959 int16_t *vChrFilter= c->vChrFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2960 int16_t *hLumFilter= c->hLumFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2961 int16_t *hChrFilter= c->hChrFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2962 int32_t *lumMmxFilter= c->lumMmxFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2963 int32_t *chrMmxFilter= c->chrMmxFilter;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2964 const int vLumFilterSize= c->vLumFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2965 const int vChrFilterSize= c->vChrFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2966 const int hLumFilterSize= c->hLumFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2967 const int hChrFilterSize= c->hChrFilterSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2968 int16_t **lumPixBuf= c->lumPixBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2969 int16_t **chrPixBuf= c->chrPixBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2970 const int vLumBufSize= c->vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2971 const int vChrBufSize= c->vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2972 uint8_t *funnyYCode= c->funnyYCode;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2973 uint8_t *funnyUVCode= c->funnyUVCode;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2974 uint8_t *formatConvBuffer= c->formatConvBuffer;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2975 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2976 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2977 int lastDstY;
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2978 uint8_t *pal=NULL;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2979
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2980 /* vars whch will change and which we need to storw back in the context */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2981 int dstY= c->dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2982 int lumBufIndex= c->lumBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2983 int chrBufIndex= c->chrBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2984 int lastInLumBuf= c->lastInLumBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2985 int lastInChrBuf= c->lastInChrBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2986
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2987 if(isPacked(c->srcFormat)){
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
2988 pal= src[1];
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2989 src[0]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2990 src[1]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2991 src[2]= src[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2992 srcStride[0]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2993 srcStride[1]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2994 srcStride[2]= srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2995 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2996 srcStride[1]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2997 srcStride[2]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2998
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
2999 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3000 // (int)dst[0], (int)dst[1], (int)dst[2]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3001
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3002 #if 0 //self test FIXME move to a vfilter or something
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3003 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3004 static volatile int i=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3005 i++;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
3006 if(srcFormat==PIX_FMT_YUV420P && i==1 && srcSliceH>= c->srcH)
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3007 selfTest(src, srcStride, c->srcW, c->srcH);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3008 i--;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3009 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3010 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3011
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3012 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3013 //dstStride[0],dstStride[1],dstStride[2]);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3014
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3015 if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3016 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3017 static int firstTime=1; //FIXME move this into the context perhaps
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3018 if(flags & SWS_PRINT_INFO && firstTime)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3019 {
21981
a14ddab18acd Pass a context to av_log(), when possible
lucabe
parents: 21760
diff changeset
3020 av_log(c, AV_LOG_WARNING, "SwScaler: Warning: dstStride is not aligned!\n"
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3021 "SwScaler: ->cannot do aligned memory acesses anymore\n");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3022 firstTime=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3023 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3024 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3025
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3026 /* Note the user might start scaling the picture in the middle so this will not get executed
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3027 this is not really intended but works currently, so ppl might do it */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3028 if(srcSliceY ==0){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3029 lumBufIndex=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3030 chrBufIndex=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3031 dstY=0;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3032 lastInLumBuf= -1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3033 lastInChrBuf= -1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3034 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3035
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3036 lastDstY= dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3037
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3038 for(;dstY < dstH; dstY++){
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3039 unsigned char *dest =dst[0]+dstStride[0]*dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3040 const int chrDstY= dstY>>c->chrDstVSubSample;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3041 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3042 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3043
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3044 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3045 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3046 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3047 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3048
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3049 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3050 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3051 //handle holes (FAST_BILINEAR & weird filters)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3052 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3053 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3054 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3055 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3056 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3057
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3058 // Do we have enough lines in this slice to output the dstY line
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3059 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3060 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3061 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3062 while(lastInLumBuf < lastLumSrcY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3063 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3064 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3065 lumBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3066 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3067 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3068 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3069 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3070 // printf("%d %d\n", lumBufIndex, vLumBufSize);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3071 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3072 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3073 funnyYCode, c->srcFormat, formatConvBuffer,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
3074 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3075 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3076 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3077 while(lastInChrBuf < lastChrSrcY)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3078 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3079 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3080 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3081 chrBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3082 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3083 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3084 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3085 //FIXME replace parameters through context struct (some at least)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3086
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3087 if(!(isGray(srcFormat) || isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3088 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3089 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3090 funnyUVCode, c->srcFormat, formatConvBuffer,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
3091 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3092 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3093 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3094 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3095 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3096 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3097 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3098 else // not enough lines left in this slice -> load the rest in the buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3099 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3100 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3101 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3102 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3103 vChrBufSize, vLumBufSize);*/
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3104
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3105 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3106 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3107 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3108 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3109 lumBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3110 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3111 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3112 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3113 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3114 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3115 funnyYCode, c->srcFormat, formatConvBuffer,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
3116 c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3117 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3118 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3119 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3120 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3121 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3122 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3123 chrBufIndex++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3124 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3125 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3126 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3127
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3128 if(!(isGray(srcFormat) || isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3129 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3130 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3131 funnyUVCode, c->srcFormat, formatConvBuffer,
22218
ff7aa2aecb9d pal8 input
michael
parents: 21981
diff changeset
3132 c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3133 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3134 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3135 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3136 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3137 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3138 break; //we can't output a dstY line so let's try with the next slice
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3139 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3140
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3141 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3142 b5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3143 g6Dither= dither4[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3144 g5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3145 r5Dither= dither8[(dstY+1)&1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3146 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3147 if(dstY < dstH-2)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3148 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3149 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3150 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3151 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3152 int i;
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3153 if(flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3154 for(i=0; i<vLumFilterSize; i+=2){
21756
b41f4217d57a Add some explicit casts to avoid some warnings
lucabe
parents: 21686
diff changeset
3155 lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ];
b41f4217d57a Add some explicit casts to avoid some warnings
lucabe
parents: 21686
diff changeset
3156 lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)];
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3157 lumMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3158 lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3159 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3160 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3161 for(i=0; i<vChrFilterSize; i+=2){
21756
b41f4217d57a Add some explicit casts to avoid some warnings
lucabe
parents: 21686
diff changeset
3162 chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ];
b41f4217d57a Add some explicit casts to avoid some warnings
lucabe
parents: 21686
diff changeset
3163 chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)];
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3164 chrMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3165 chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3166 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3167 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3168 }else{
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3169 for(i=0; i<vLumFilterSize; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3170 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3171 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
22383
508e55817748 Fix a possible crash on 64 bit systems when the lumSrcPtr or chrSrcPtr
reimar
parents: 22321
diff changeset
3172 lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3173 lumMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3174 lumMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3175 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3176 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3177 for(i=0; i<vChrFilterSize; i++)
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3178 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3179 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
22383
508e55817748 Fix a possible crash on 64 bit systems when the lumSrcPtr or chrSrcPtr
reimar
parents: 22321
diff changeset
3180 chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3181 chrMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3182 chrMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3183 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3184 }
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3185 }
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3186 #endif
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
3187 if(dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3188 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3189 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3190 RENAME(yuv2nv12X)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3191 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3192 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3193 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3194 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3195 else if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3196 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3197 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3198 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3199 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3200 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3201 int16_t *lumBuf = lumPixBuf[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3202 int16_t *chrBuf= chrPixBuf[0];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3203 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3204 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3205 else //General YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3206 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3207 RENAME(yuv2yuvX)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3208 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3209 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3210 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3211 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3212 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3213 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3214 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3215 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3216 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3217 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3218 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3219 int chrAlpha= vChrFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3220 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3221 dest, dstW, chrAlpha, dstFormat, flags, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3222 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3223 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3224 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3225 int lumAlpha= vLumFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3226 int chrAlpha= vChrFilter[2*dstY+1];
19172
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3227 lumMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3228 lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3229 chrMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors
michael
parents: 18861
diff changeset
3230 chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3231 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3232 dest, dstW, lumAlpha, chrAlpha, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3233 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3234 else //General RGB
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3235 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3236 RENAME(yuv2packedX)(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3237 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3238 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3239 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3240 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3241 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3242 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3243 else // hmm looks like we can't use MMX here without overwriting this array's tail
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3244 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3245 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3246 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
19872
8e50cba9fe03 Remove the dependency of libswscale on img_format.h
lucabe
parents: 19594
diff changeset
3247 if(dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
18861
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3248 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3249 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3250 yuv2nv12XinC(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3251 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3252 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3253 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3254 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3255 else if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3256 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3257 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3258 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3259 yuv2yuvXinC(
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3260 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3261 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3262 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3263 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3264 else
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3265 {
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3266 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3267 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3268 yuv2packedXinC(c,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3269 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3270 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3271 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3272 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3273 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3274 }
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3275
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3276 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3277 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3278 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3279 #endif
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3280 /* store changed local vars back in the context */
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3281 c->dstY= dstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3282 c->lumBufIndex= lumBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3283 c->chrBufIndex= chrBufIndex;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3284 c->lastInLumBuf= lastInLumBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3285 c->lastInChrBuf= lastInChrBuf;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3286
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3287 return dstY - lastDstY;
8579acff875e Move postproc ---> libswscale
lucabe
parents:
diff changeset
3288 }