mplayer.hg: libswscale/swscale

annotate libswscale/swscale_template.c @ 19619:a83e5b8d2e63

Patch from Karolina Lindqvist <karolina.lindqvist@kramnet.se> "There is a bug in the zoran -vo zr driver, that makes the output garbled always. It also probably affects the zrmjpeg filter. This patch takes care of the problem." Patch tested and OK. And 10l to me, because this bug probably has existed for a looong time.

author	rik
date	Fri, 01 Sep 2006 18:49:40 +0000
parents	4678e9f81334
children	8e50cba9fe03

rev	line source
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1 /*
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2 Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	4 This program is free software; you can redistribute it and/or modify
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	5 it under the terms of the GNU General Public License as published by
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	6 the Free Software Foundation; either version 2 of the License, or
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	7 (at your option) any later version.
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	8
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	9 This program is distributed in the hope that it will be useful,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	10 but WITHOUT ANY WARRANTY; without even the implied warranty of
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	12 GNU General Public License for more details.
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	13
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	14 You should have received a copy of the GNU General Public License
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	15 along with this program; if not, write to the Free Software
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19594 4678e9f81334 make the C code of the swscaler which i wrote LGPL michael parents: 19400 diff changeset	17
4678e9f81334 make the C code of the swscaler which i wrote LGPL michael parents: 19400 diff changeset	18 the C code (not assembly, mmx, ...) of the swscaler which has been written
4678e9f81334 make the C code of the swscaler which i wrote LGPL michael parents: 19400 diff changeset	19 by Michael Niedermayer can be used under the LGPL license too
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	20 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	21
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	22 #undef REAL_MOVNTQ
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	23 #undef MOVNTQ
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	24 #undef PAVGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	25 #undef PREFETCH
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	26 #undef PREFETCHW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	27 #undef EMMS
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	28 #undef SFENCE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	29
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	30 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	31 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	32 #define EMMS "femms"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	33 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	34 #define EMMS "emms"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	35 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	36
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	37 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	38 #define PREFETCH "prefetch"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	39 #define PREFETCHW "prefetchw"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	40 #elif defined ( HAVE_MMX2 )
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	41 #define PREFETCH "prefetchnta"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	42 #define PREFETCHW "prefetcht0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	43 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	44 #define PREFETCH "/nop"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	45 #define PREFETCHW "/nop"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	46 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	47
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	48 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	49 #define SFENCE "sfence"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	50 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	51 #define SFENCE "/nop"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	52 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	53
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	54 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	55 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	56 #elif defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	57 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	58 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	59
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	60 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	61 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	62 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	63 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	64 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	65 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	66
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	67 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	68 #include "swscale_altivec_template.c"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	69 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	70
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	71 #define YSCALEYUV2YV12X(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	72 asm volatile(\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	73 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	74 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	75 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	76 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	77 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	78 ASMALIGN(4) /* FIXME Unroll? */\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	79 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	80 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	81 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	82 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	83 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	84 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	85 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	86 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	87 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	88 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	89 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	90 " jnz 1b \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	91 "psraw $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	92 "psraw $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	93 "packuswb %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	94 MOVNTQ(%%mm3, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	95 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	96 "cmp %2, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	97 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	98 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	99 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	100 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	101 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	102 :: "r" (&c->redDither),\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	103 "r" (dest), "p" (width)\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	104 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	105 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	106
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	107 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	108 asm volatile(\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	109 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	110 "xor %%"REG_a", %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	111 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	112 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	113 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	114 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	115 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	116 ASMALIGN(4) \
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	117 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	118 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	119 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	120 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	121 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	122 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	123 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	124 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	125 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	126 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	127 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	128 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	129 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	130 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	131 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	132 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	133 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	134 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	135 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	136 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	137 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	138 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	139 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	140 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	141 " jnz 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	142 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	143 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	144 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	145 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	146 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	147 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	148 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	149 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	150 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	151 "psraw $3, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	152 "psraw $3, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	153 "packuswb %%mm6, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	154 MOVNTQ(%%mm4, (%1, %%REGa))\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	155 "add $8, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	156 "cmp %2, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	157 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	158 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	159 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	160 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	161 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	162 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	163 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	164 :: "r" (&c->redDither),\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	165 "r" (dest), "p" (width)\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	166 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	167 );
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	168
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	169 #define YSCALEYUV2YV121 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	170 "mov %2, %%"REG_a" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	171 ASMALIGN(4) /* FIXME Unroll? */\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	172 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	173 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	174 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	175 "psraw $7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	176 "psraw $7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	177 "packuswb %%mm1, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	178 MOVNTQ(%%mm0, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	179 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	180 "jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	181
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	182 /*
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	183 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	184 "m" (lumMmxFilter+lumFilterSize4), "m" (chrMmxFilter+chrFilterSize4),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	185 "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	186 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	187 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	188 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	189 #define YSCALEYUV2PACKEDX \
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	190 asm volatile(\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	191 "xor %%"REG_a", %%"REG_a" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	192 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	193 "nop \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	194 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	195 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	196 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	197 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	198 "movq %%mm3, %%mm4 \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	199 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	200 "2: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	201 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	202 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	203 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	204 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	205 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	206 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	207 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	208 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	209 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	210 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	211 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	212 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	213 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	214 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	215 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	216 "movq %%mm1, %%mm7 \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	217 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	218 "2: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	219 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	220 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	221 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	222 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	223 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	224 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	225 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	226 "paddw %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	227 "paddw %%mm5, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	228 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	229 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	230
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	231 #define YSCALEYUV2PACKEDX_END\
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	232 :: "r" (&c->redDither), \
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	233 "m" (dummy), "m" (dummy), "m" (dummy),\
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	234 "r" (dest), "m" (dstW)\
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	235 : "%"REG_a, "%"REG_d, "%"REG_S\
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	236 );
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	237
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	238 #define YSCALEYUV2PACKEDX_ACCURATE \
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	239 asm volatile(\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	240 "xor %%"REG_a", %%"REG_a" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	241 ASMALIGN(4)\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	242 "nop \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	243 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	244 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	245 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	246 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	247 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	248 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	249 "pxor %%mm7, %%mm7 \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	250 ASMALIGN(4)\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	251 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	252 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	253 "movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	254 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	255 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	256 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	257 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	258 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	259 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	260 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	261 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	262 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	263 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	264 "movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	265 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	266 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	267 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	268 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	269 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	270 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	271 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	272 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	273 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	274 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	275 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	276 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	277 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	278 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	279 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	280 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	281 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	282 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	283 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	284 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	285 "movq %%mm4, "U_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	286 "movq %%mm6, "V_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	287 \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	288 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	289 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	290 "pxor %%mm1, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	291 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	292 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	293 "pxor %%mm6, %%mm6 \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	294 ASMALIGN(4)\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	295 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	296 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	297 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	298 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	299 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	300 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	301 "punpcklwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	302 "punpckhwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	303 "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	304 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	305 "pmaddwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	306 "paddd %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	307 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	308 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	309 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	310 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	311 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	312 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	313 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	314 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	315 "pmaddwd %%mm4, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	316 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	317 "paddd %%mm2, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	318 "paddd %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	319 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	320 "psrad $16, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	321 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	322 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	323 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	324 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	325 "packssdw %%mm5, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	326 "packssdw %%mm6, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	327 "paddw %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	328 "paddw %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	329 "movq "U_TEMP"(%0), %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	330 "movq "V_TEMP"(%0), %%mm4 \n\t"\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	331
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	332 #define YSCALEYUV2RGBX \
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	333 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	334 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	335 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	336 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	337 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	338 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	339 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	340 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	341 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	342 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	343 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	344 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	345 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	346 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	347 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	348 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	349 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	350 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	351 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	352 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	353 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	354 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	355 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	356 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	357 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	358 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	359 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	360 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	361 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	362 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	363 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	364 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	365 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	366 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	367 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	368 #if 0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	369 #define FULL_YSCALEYUV2RGB \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	370 "pxor %%mm7, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	371 "movd %6, %%mm6 \n\t" /yalpha1/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	372 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	373 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	374 "movd %7, %%mm5 \n\t" /uvalpha1/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	375 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	376 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	377 "xor %%"REG_a", %%"REG_a" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	378 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	379 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	380 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	381 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	382 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	383 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	384 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	385 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	386 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	387 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	388 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	389 "movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	390 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	391 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	392 "movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	393 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	394 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	395 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	396 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	397 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	398 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	399 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	400 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	401 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	402 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	403 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	404 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	405 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	406 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	407 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	408 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	409 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	410 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	411 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	412 "paddw %%mm1, %%mm3 \n\t" /* B*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	413 "paddw %%mm1, %%mm0 \n\t" /* R*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	414 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	415 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	416 "packuswb %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	417 "paddw %%mm4, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	418 "paddw %%mm2, %%mm1 \n\t" /* G*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	419 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	420 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	421 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	422
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	423 #define REAL_YSCALEYUV2PACKED(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	424 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	425 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	426 "psraw $3, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	427 "psraw $3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	428 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	429 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	430 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	431 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	432 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	433 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	434 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	435 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	436 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	437 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	438 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	439 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	440 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	441 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	442 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	443 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	444 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	445 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	446 "movq (%0, "#index", 2), %%mm0 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	447 "movq (%1, "#index", 2), %%mm1 \n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	448 "movq 8(%0, "#index", 2), %%mm6 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	449 "movq 8(%1, "#index", 2), %%mm7 \n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	450 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	451 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	452 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	453 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	454 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	455 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	456 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	457 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	458
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	459 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	460
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	461 #define REAL_YSCALEYUV2RGB(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	462 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	463 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	464 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	465 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	466 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	467 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	468 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	469 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	470 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	471 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	472 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	473 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	474 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	475 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	476 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	477 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	478 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	479 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	480 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	481 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	482 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	483 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	484 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	485 "movq (%0, "#index", 2), %%mm0 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	486 "movq (%1, "#index", 2), %%mm1 \n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	487 "movq 8(%0, "#index", 2), %%mm6\n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	488 "movq 8(%1, "#index", 2), %%mm7\n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	489 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	490 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	491 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	492 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	493 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	494 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	495 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	496 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	497 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	498 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	499 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	500 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	501 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	502 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	503 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	504 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	505 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	506 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	507 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	508 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	509 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	510 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	511 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	512 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	513 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	514 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	515 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	516 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	517 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	518 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	519 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	520 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	521 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	522 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	523 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	524 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	525 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	526
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	527 #define REAL_YSCALEYUV2PACKED1(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	528 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	529 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	530 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	531 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	532 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	533 "psraw $7, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	534 "psraw $7, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	535 "movq (%0, "#index", 2), %%mm1 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	536 "movq 8(%0, "#index", 2), %%mm7 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	537 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	538 "psraw $7, %%mm7 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	539
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	540 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	541
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	542 #define REAL_YSCALEYUV2RGB1(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	543 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	544 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	545 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	546 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	547 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	548 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	549 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	550 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	551 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	552 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	553 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	554 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	555 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	556 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	557 "movq (%0, "#index", 2), %%mm1 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	558 "movq 8(%0, "#index", 2), %%mm7 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	559 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	560 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	561 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	562 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	563 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	564 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	565 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	566 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	567 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	568 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	569 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	570 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	571 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	572 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	573 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	574 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	575 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	576 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	577 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	578 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	579 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	580 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	581 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	582 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	583 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	584 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	585 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	586 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	587 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	588 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	589 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	590
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	591 #define REAL_YSCALEYUV2PACKED1b(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	592 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	593 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	594 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	595 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	596 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	597 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	598 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	599 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	600 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	601 "psrlw $8, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	602 "psrlw $8, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	603 "movq (%0, "#index", 2), %%mm1 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	604 "movq 8(%0, "#index", 2), %%mm7 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	605 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	606 "psraw $7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	607 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	608
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	609 // do vertical chrominance interpolation
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	610 #define REAL_YSCALEYUV2RGB1b(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	611 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	612 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	613 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	614 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	615 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	616 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	617 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	618 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	619 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	620 "psrlw $5, %%mm3 \n\t" /FIXME might overflow/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	621 "psrlw $5, %%mm4 \n\t" /FIXME might overflow/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	622 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	623 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	624 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	625 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	626 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	627 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	628 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	629 "movq (%0, "#index", 2), %%mm1 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	630 "movq 8(%0, "#index", 2), %%mm7 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	631 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	632 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	633 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	634 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	635 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	636 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	637 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	638 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	639 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	640 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	641 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	642 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	643 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	644 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	645 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	646 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	647 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	648 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	649 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	650 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	651 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	652 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	653 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	654 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	655 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	656 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	657 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	658 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	659 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	660 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	661 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	662
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	663 #define REAL_WRITEBGR32(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	664 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	665 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	666 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	667 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	668 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	669 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	670 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	671 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	672 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	673 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	674 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	675 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	676 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	677 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	678 MOVNTQ(%%mm0, (dst, index, 4))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	679 MOVNTQ(%%mm2, 8(dst, index, 4))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	680 MOVNTQ(%%mm1, 16(dst, index, 4))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	681 MOVNTQ(%%mm3, 24(dst, index, 4))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	682 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	683 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	684 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	685 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	686 #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	687
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	688 #define REAL_WRITEBGR16(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	689 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	690 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	691 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	692 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	693 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	694 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	695 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	696 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	697 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	698 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	699 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	700 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	701 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	702 "psllq $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	703 "psllq $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	704 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	705 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	706 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	707 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	708 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	709 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	710 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	711 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	712 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	713 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	714 #define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	715
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	716 #define REAL_WRITEBGR15(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	717 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	718 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	719 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	720 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	721 "psrlq $1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	722 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	723 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	724 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	725 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	726 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	727 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	728 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	729 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	730 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	731 "psllq $2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	732 "psllq $2, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	733 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	734 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	735 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	736 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	737 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	738 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	739 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	740 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	741 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	742 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	743 #define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	744
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	745 #define WRITEBGR24OLD(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	746 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	747 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	748 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	749 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	750 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	751 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	752 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	753 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	754 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	755 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	756 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	757 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	758 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	759 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	760 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	761 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	762 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	763 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	764 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	765 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	766 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	767 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	768 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	769 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	770 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	771 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	772 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	773 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	774 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	775 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	776 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	777 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	778 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	779 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	780 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	781 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	782 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	783 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	784 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	785 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	786 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	787 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	788 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	789 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	790 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	791 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	792 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	793 MOVNTQ(%%mm2, 8(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	794 MOVNTQ(%%mm3, 16(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	795 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	796 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	797 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	798 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	799 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	800
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	801 #define WRITEBGR24MMX(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	802 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	803 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	804 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	805 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	806 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	807 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	808 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	809 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	810 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	811 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	812 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	813 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	814 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	815 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	816 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	817 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	818 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	819 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	820 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	821 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	822 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	823 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	824 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	825 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	826 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	827 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	828 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	829 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	830 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	831 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	832 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	833 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	834 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	835 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	836 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	837 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	838 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	839 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	840 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	841 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	842 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	843 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	844 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	845 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	846 MOVNTQ(%%mm5, 16(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	847 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	848 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	849 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	850 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	851 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	852 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	853
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	854 #define WRITEBGR24MMX2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	855 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	856 "movq "MANGLE(M24A)", %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	857 "movq "MANGLE(M24C)", %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	858 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	859 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	860 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	861 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	862 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	863 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	864 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	865 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	866 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	867 "por %%mm1, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	868 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	869 MOVNTQ(%%mm6, (dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	870 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	871 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	872 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	873 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	874 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	875 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	876 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	877 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	878 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	879 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	880 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	881 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	882 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	883 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	884 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	885 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	886 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	887 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	888 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	889 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	890 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	891 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	892 "por %%mm1, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	893 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	894 MOVNTQ(%%mm6, 16(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	895 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	896 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	897 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	898 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	899 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	900 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	901
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	902 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	903 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	904 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	905 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	906 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	907 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	908 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	909
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	910 #define REAL_WRITEYUY2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	911 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	912 "packuswb %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	913 "packuswb %%mm7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	914 "punpcklbw %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	915 "movq %%mm1, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	916 "punpcklbw %%mm3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	917 "punpckhbw %%mm3, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	918 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	919 MOVNTQ(%%mm1, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	920 MOVNTQ(%%mm7, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	921 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	922 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	923 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	924 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	925 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	926
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	927
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	928 static inline void RENAME(yuv2yuvX)(SwsContext c, int16_t lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	929 int16_t chrFilter, int16_t *chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	930 uint8_t dest, uint8_t uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	931 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	932 #ifdef HAVE_MMX
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	933 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	934 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	935 YSCALEYUV2YV12X_ACCURATE( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	936 YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	937 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	938
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	939 YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	940 }else{
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	941 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	942 YSCALEYUV2YV12X( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	943 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	944 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	945
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	946 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	947 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	948 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	949 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	950 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	951 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	952 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	953 #else //HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	954 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	955 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	956 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	957 #endif //!HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	958 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	959 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	960
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	961 static inline void RENAME(yuv2nv12X)(SwsContext c, int16_t lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	962 int16_t chrFilter, int16_t *chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	963 uint8_t dest, uint8_t uDest, int dstW, int chrDstW, int dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	964 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	965 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	966 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	967 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	968 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	969
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	970 static inline void RENAME(yuv2yuv1)(int16_t lumSrc, int16_t chrSrc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	971 uint8_t dest, uint8_t uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	972 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	973 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	974 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	975 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	976 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	977 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	978 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	979 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	980 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	981 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	982
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	983 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	984 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	985 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	986 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	987 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	988 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	989 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	990
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	991 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	992 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	993 :: "r" (lumSrc + dstW), "r" (dest + dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	994 "g" (-dstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	995 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	996 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	997 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	998 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	999 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1000 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1001 int val= lumSrc[i]>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1002
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1003 if(val&256){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1004 if(val<0) val=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1005 else val=255;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1006 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1007
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1008 dest[i]= val;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1009 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1010
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1011 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1012 for(i=0; i<chrDstW; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1013 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1014 int u=chrSrc[i]>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1015 int v=chrSrc[i + 2048]>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1016
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1017 if((u\|v)&256){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1018 if(u<0) u=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1019 else if (u>255) u=255;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1020 if(v<0) v=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1021 else if (v>255) v=255;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1022 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1023
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1024 uDest[i]= u;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1025 vDest[i]= v;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1026 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1027 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1028 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1029
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1030
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1031 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1032 * vertical scale YV12 to RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1033 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1034 static inline void RENAME(yuv2packedX)(SwsContext c, int16_t lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1035 int16_t chrFilter, int16_t *chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1036 uint8_t *dest, long dstW, long dstY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1037 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1038 long dummy=0;
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1039 #ifdef HAVE_MMX
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1040 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1041 switch(c->dstFormat){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1042 case IMGFMT_BGR32:
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1043 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1044 YSCALEYUV2RGBX
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1045 WRITEBGR32(%4, %5, %%REGa)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1046
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1047 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1048 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1049 case IMGFMT_BGR24:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1050 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1051 YSCALEYUV2RGBX
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1052 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1053 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1054 WRITEBGR24(%%REGc, %5, %%REGa)
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1055
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1056
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1057 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1058 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1059 "r" (dest), "m" (dstW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1060 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1061 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1062 return;
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1063 case IMGFMT_BGR15:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1064 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1065 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1066 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1067 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1068 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1069 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1070 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1071 #endif
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1072
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1073 WRITEBGR15(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1074 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1075 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1076 case IMGFMT_BGR16:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1077 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1078 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1079 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1080 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1081 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1082 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1083 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1084 #endif
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1085
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1086 WRITEBGR16(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1087 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1088 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1089 case IMGFMT_YUY2:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1090 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1091 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1092
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1093 "psraw $3, %%mm3 \n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1094 "psraw $3, %%mm4 \n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1095 "psraw $3, %%mm1 \n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1096 "psraw $3, %%mm7 \n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1097 WRITEYUY2(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1098 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1099 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1100 }
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1101 }else{
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1102 switch(c->dstFormat)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1103 {
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1104 case IMGFMT_BGR32:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1105 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1106 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1107 WRITEBGR32(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1108 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1109 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1110 case IMGFMT_BGR24:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1111 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1112 YSCALEYUV2RGBX
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1113 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1114 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1115 WRITEBGR24(%%REGc, %5, %%REGa)
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1116
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1117 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1118 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1119 "r" (dest), "m" (dstW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1120 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1121 );
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1122 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1123 case IMGFMT_BGR15:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1124 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1125 YSCALEYUV2RGBX
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1126 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1127 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1128 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1129 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1130 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1131 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1132
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1133 WRITEBGR15(%4, %5, %%REGa)
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1134 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1135 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1136 case IMGFMT_BGR16:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1137 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1138 YSCALEYUV2RGBX
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1139 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1140 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1141 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1142 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1143 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1144 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1145
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1146 WRITEBGR16(%4, %5, %%REGa)
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1147 YSCALEYUV2PACKEDX_END
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1148 return;
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1149 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1150 YSCALEYUV2PACKEDX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1151 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1152
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1153 "psraw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1154 "psraw $3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1155 "psraw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1156 "psraw $3, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1157 WRITEYUY2(%4, %5, %%REGa)
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1158 YSCALEYUV2PACKEDX_END
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1159 return;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1160 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1161 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1162 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1163 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1164 /* The following list of supported dstFormat values should
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1165 match what's found in the body of altivec_yuv2packedX() */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1166 if(c->dstFormat==IMGFMT_ABGR \|\| c->dstFormat==IMGFMT_BGRA \|\|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1167 c->dstFormat==IMGFMT_BGR24 \|\| c->dstFormat==IMGFMT_RGB24 \|\|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1168 c->dstFormat==IMGFMT_RGBA \|\| c->dstFormat==IMGFMT_ARGB)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1169 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1170 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1171 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1172 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1173 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1174 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1175 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1176 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1177 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1178
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1179 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1180 * vertical bilinear scale YV12 to RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1181 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1182 static inline void RENAME(yuv2packed2)(SwsContext c, uint16_t buf0, uint16_t buf1, uint16_t uvbuf0, uint16_t *uvbuf1,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1183 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1184 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1185 int yalpha1=yalpha^4095;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1186 int uvalpha1=uvalpha^4095;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1187 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1188
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1189 #if 0 //isn't used
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1190 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1191 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1192 switch(dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1193 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1194 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1195 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1196 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1197
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1198
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1199 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1200 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1201 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1202
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1203 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1204 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1205 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1206
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1207 MOVNTQ(%%mm3, (%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1208 MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1209
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1210 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1211 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1212 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1213
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1214
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1215 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1216 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1217 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1218 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1219 break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1220 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1221 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1222
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1223 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1224
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1225 // lsb ... msb
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1226 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1227 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1228
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1229 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1230 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1231 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1232
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1233 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1234 "psrlq $8, %%mm3 \n\t" // GR0BGR00
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1235 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1236 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1237 "por %%mm2, %%mm3 \n\t" // BGRBGR00
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1238 "movq %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1239 "psllq $48, %%mm1 \n\t" // 000000BG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1240 "por %%mm1, %%mm3 \n\t" // BGRBGRBG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1241
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1242 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1243 "psrld $16, %%mm2 \n\t" // R000R000
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1244 "psrlq $24, %%mm1 \n\t" // 0BGR0000
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1245 "por %%mm2, %%mm1 \n\t" // RBGRR000
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1246
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1247 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1248 "add %%"REG_a", %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1249
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1250 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1251 //FIXME Alignment
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1252 "movntq %%mm3, (%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1253 "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1254 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1255 "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1256 "psrlq $32, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1257 "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1258 "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1259 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1260 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1261 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1262 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1263
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1264 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1265 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1266 : "%"REG_a, "%"REG_b
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1267 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1268 break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1269 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1270 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1271
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1272 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1273 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1274 "paddusb "MANGLE(g5Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1275 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1276 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1277 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1278 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1279 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1280 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1281
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1282 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1283 "psllw $2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1284 "psllw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1285 "pand "MANGLE(g15Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1286 "pand "MANGLE(r15Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1287
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1288 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1289 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1290
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1291 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1292
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1293 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1294 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1295 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1296
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1297 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1298 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1299 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1300 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1301 break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1302 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1303 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1304
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1305 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1306 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1307 "paddusb "MANGLE(g6Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1308 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1309 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1310 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1311 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1312 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1313 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1314
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1315 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1316 "psllw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1317 "psllw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1318 "pand "MANGLE(g16Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1319 "pand "MANGLE(r16Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1320
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1321 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1322 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1323
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1324 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1325
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1326 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1327 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1328 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1329
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1330 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1331 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1332 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1333 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1334 break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1335 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1336 case IMGFMT_RGB32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1337 #ifndef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1338 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1339 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1340 if(dstFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1341 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1342 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1343 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1344 dest++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1345 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1346 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1347 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1348 int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>19)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1349 int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1350 int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1351 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1352 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1353 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1354 dest+= 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1355 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1356 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1357 else if(dstFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1358 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1359 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1360 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1361 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1362 int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>19)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1363 int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1364 int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1365 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1366 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1367 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1368 dest+= 3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1369 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1370 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1371 else if(dstFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1372 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1373 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1374 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1375 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1376 int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>19)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1377 int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1378 int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1379
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1380 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1381 clip_table16b[(Y + yuvtab_40cf[U]) >>13] \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1382 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1383 clip_table16r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1384 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1385 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1386 else if(dstFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1387 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1388 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1389 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1390 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1391 int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>19)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1392 int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1393 int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1394
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1395 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1396 clip_table15b[(Y + yuvtab_40cf[U]) >>13] \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1397 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1398 clip_table15r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1399 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1400 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1401 }//FULL_UV_IPOL
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1402 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1403 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1404 #endif // if 0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1405 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1406 switch(c->dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1407 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1408 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1409 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1410 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1411 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1412 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1413 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1414 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1415 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1416 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1417 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1418
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1419 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1420 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1421 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1422 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1423 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1424 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1425 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1426 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1427 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1428 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1429 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1430 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1431 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1432 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1433 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1434 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1435 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1436 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1437 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1438 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1439 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1440 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1441 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1442 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1443 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1444 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1445 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1446 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1447 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1448
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1449 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1450 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1451 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1452
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1453 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1454 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1455 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1456 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1457 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1458 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1459 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1460 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1461 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1462 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1463 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1464 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1465 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1466 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1467 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1468 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1469
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1470 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1471 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1472 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1473 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1474 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1475 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1476 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1477 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1478 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1479 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1480 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1481 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1482 YSCALEYUV2PACKED(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1483 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1484 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1485 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1486 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1487 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1488 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1489 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1490 default: break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1491 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1492 #endif //HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1493 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1494 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1495
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1496 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1497 * YV12 to RGB without scaling or interpolating
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1498 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1499 static inline void RENAME(yuv2packed1)(SwsContext c, uint16_t buf0, uint16_t uvbuf0, uint16_t uvbuf1,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1500 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1501 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1502 const int yalpha1=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1503 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1504
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1505 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1506 const int yalpha= 4096; //FIXME ...
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1507
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1508 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1509 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1510 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1511 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1512 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1513
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1514 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1515 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1516 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1517 switch(dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1518 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1519 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1520 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1521 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1522 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1523 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1524 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1525 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1526 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1527 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1528
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1529 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1530 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1531 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1532 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1533 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1534 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1535 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1536 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1537 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1538 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1539 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1540 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1541 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1542
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1543 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1544 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1545 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1546 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1547 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1548 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1549 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1550 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1551 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1552 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1553 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1554 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1555 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1556 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1557 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1558 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1559 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1560 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1561 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1562
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1563 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1564 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1565 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1566 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1567 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1568 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1569 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1570 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1571 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1572 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1573 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1574 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1575 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1576 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1577 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1578 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1579
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1580 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1581 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1582 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1583
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1584 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1585 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1586 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1587 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1588 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1589 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1590 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1591 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1592 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1593 YSCALEYUV2PACKED1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1594 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1595 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1596 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1597
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1598 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1599 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1600 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1601 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1602 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1603 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1604 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1605 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1606 switch(dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1607 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1608 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1609 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1610 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1611 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1612 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1613 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1614 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1615 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1616 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1617
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1618 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1619 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1620 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1621 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1622 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1623 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1624 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1625 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1626 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1627 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1628 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1629 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1630 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1631
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1632 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1633 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1634 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1635 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1636 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1637 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1638 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1639 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1640 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1641 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1642 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1643 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1644 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1645 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1646 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1647 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1648 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1649 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1650 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1651
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1652 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1653 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1654 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1655 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1656 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1657 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1658 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1659 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1660 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1661 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1662 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1663 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1664 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1665 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1666 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1667 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1668
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1669 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1670 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1671 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1672
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1673 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1674 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1675 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1676 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1677 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1678 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1679 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1680 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1681 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1682 YSCALEYUV2PACKED1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1683 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1684 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1685 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1686
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1687 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1688 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1689 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1690 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1691 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1692 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1693 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1694 if( uvalpha < 2048 )
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1695 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1696 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1697 }else{
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1698 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1699 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1700 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1701
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1702 //FIXME yuy2* can read upto 7 samples to much
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1703
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1704 static inline void RENAME(yuy2ToY)(uint8_t dst, uint8_t src, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1705 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1706 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1707 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1708 "movq "MANGLE(bm01010101)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1709 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1710 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1711 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1712 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1713 "pand %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1714 "pand %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1715 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1716 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1717 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1718 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1719 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1720 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1721 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1722 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1723 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1724 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1725 dst[i]= src[2*i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1726 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1727 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1728
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1729 static inline void RENAME(yuy2ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1730 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1731 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1732 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1733 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1734 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1735 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1736 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1737 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1738 "movq (%2, %%"REG_a",4), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1739 "movq 8(%2, %%"REG_a",4), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1740 PAVGB(%%mm2, %%mm0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1741 PAVGB(%%mm3, %%mm1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1742 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1743 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1744 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1745 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1746 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1747 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1748 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1749 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1750 "movd %%mm0, (%4, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1751 "movd %%mm1, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1752 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1753 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1754 : : "g" (-width), "r" (src1+width4), "r" (src2+width4), "r" (dstU+width), "r" (dstV+width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1755 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1756 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1757 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1758 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1759 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1760 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1761 dstU[i]= (src1[4i + 1] + src2[4i + 1])>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1762 dstV[i]= (src1[4i + 3] + src2[4i + 3])>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1763 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1764 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1765 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1766
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1767 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1768 static inline void RENAME(uyvyToY)(uint8_t dst, uint8_t src, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1769 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1770 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1771 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1772 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1773 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1774 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1775 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1776 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1777 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1778 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1779 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1780 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1781 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1782 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1783 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1784 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1785 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1786 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1787 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1788 dst[i]= src[2*i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1789 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1790 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1791
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1792 static inline void RENAME(uyvyToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1793 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1794 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1795 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1796 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1797 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1798 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1799 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1800 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1801 "movq (%2, %%"REG_a",4), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1802 "movq 8(%2, %%"REG_a",4), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1803 PAVGB(%%mm2, %%mm0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1804 PAVGB(%%mm3, %%mm1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1805 "pand %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1806 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1807 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1808 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1809 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1810 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1811 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1812 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1813 "movd %%mm0, (%4, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1814 "movd %%mm1, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1815 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1816 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1817 : : "g" (-width), "r" (src1+width4), "r" (src2+width4), "r" (dstU+width), "r" (dstV+width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1818 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1819 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1820 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1821 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1822 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1823 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1824 dstU[i]= (src1[4i + 0] + src2[4i + 0])>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1825 dstV[i]= (src1[4i + 2] + src2[4i + 2])>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1826 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1827 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1828 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1829
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1830 static inline void RENAME(bgr32ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1831 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1832 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1833 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1834 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1835 int b= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1836 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1837 int r= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1838
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1839 dst[i]= ((RYr + GYg + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1840 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1841 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1842
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1843 static inline void RENAME(bgr32ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1844 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1845 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1846 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1847 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1848 const int a= ((uint32_t)src1)[2i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1849 const int e= ((uint32_t)src1)[2i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1850 const int c= ((uint32_t)src2)[2i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1851 const int d= ((uint32_t)src2)[2i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1852 const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1853 const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1854 const int b= l&0x3FF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1855 const int g= h>>8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1856 const int r= l>>16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1857
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1858 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1859 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1860 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1861 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1862
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1863 static inline void RENAME(bgr24ToY)(uint8_t dst, uint8_t src, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1864 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1865 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1866 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1867 "mov %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1868 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1869 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1870 "pxor %%mm7, %%mm7 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1871 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	1872 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1873 "1: \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1874 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1875 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1876 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1877 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1878 "punpcklbw %%mm7, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1879 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1880 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1881 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1882 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1883 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1884 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1885 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1886 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1887 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1888 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1889 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1890 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1891 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1892 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1893 "packssdw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1894 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1895 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1896 "pmaddwd %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1897 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1898 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1899
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1900 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1901 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1902 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1903 "punpcklbw %%mm7, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1904 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1905 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1906 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1907 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1908 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1909 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1910 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1911 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1912 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1913 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1914 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1915 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1916 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1917 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1918 "packssdw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1919 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1920 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1921 "pmaddwd %%mm5, %%mm2 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1922 "add $24, %%"REG_d" \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1923 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1924 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1925
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1926 "packuswb %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1927 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1928
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1929 "movq %%mm0, (%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1930 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1931 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1932 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1933 : "%"REG_a, "%"REG_d
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1934 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1935 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1936 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1937 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1938 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1939 int b= src[i*3+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1940 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1941 int r= src[i*3+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1942
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1943 dst[i]= ((RYr + GYg + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1944 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1945 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1946 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1947
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1948 static inline void RENAME(bgr24ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1949 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1950 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1951 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1952 "mov %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1953 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1954 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1955 "pxor %%mm7, %%mm7 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1956 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1957 "add %%"REG_d", %%"REG_d" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	1958 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1959 "1: \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1960 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1961 PREFETCH" 64(%1, %%"REG_d") \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1962 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1963 "movq (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1964 "movq (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1965 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1966 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1967 PAVGB(%%mm1, %%mm0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1968 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1969 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1970 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1971 "psrlq $24, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1972 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1973 PAVGB(%%mm1, %%mm0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1974 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1975 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1976 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1977 #else
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1978 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1979 "movd (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1980 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1981 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1982 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1983 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1984 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1985 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1986 "paddw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1987 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1988 "paddw %%mm2, %%mm0 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1989 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1990 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1991 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1992 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1993 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1994 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1995 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1996 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1997 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1998 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1999 "paddw %%mm4, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2000 "psrlw $2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2001 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2002 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2003 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2004 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2005
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2006 "pmaddwd %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2007 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2008 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2009 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2010 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2011 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2012 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2013 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2014 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2015 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2016 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2017 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2018 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2019 "pmaddwd %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2020 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2021 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2022
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2023 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2024 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2025 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2026 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2027 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2028 PAVGB(%%mm1, %%mm4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2029 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2030 "movq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2031 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2032 "psrlq $24, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2033 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2034 PAVGB(%%mm1, %%mm4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2035 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2036 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2037 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2038 #else
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2039 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2040 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2041 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2042 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2043 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2044 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2045 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2046 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2047 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2048 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2049 "paddw %%mm2, %%mm4 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2050 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2051 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2052 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2053 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2054 "punpcklbw %%mm7, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2055 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2056 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2057 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2058 "paddw %%mm1, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2059 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2060 "paddw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2061 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2062 "psrlw $2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2063 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2064 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2065 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2066 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2067
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2068 "pmaddwd %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2069 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2070 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2071 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2072 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2073 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2074 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2075 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2076 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2077 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2078 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2079 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2080 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2081 "pmaddwd %%mm5, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2082 "add $24, %%"REG_d" \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2083 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2084 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2085
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2086 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2087 "punpckldq %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2088 "punpckhdq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2089 "packsswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2090 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2091
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2092 "movd %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2093 "punpckhdq %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2094 "movd %%mm0, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2095 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2096 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2097 : : "r" (src1+width6), "r" (src2+width6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2098 : "%"REG_a, "%"REG_d
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2099 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2100 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2101 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2102 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2103 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2104 int b= src1[6i + 0] + src1[6i + 3] + src2[6i + 0] + src2[6i + 3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2105 int g= src1[6i + 1] + src1[6i + 4] + src2[6i + 1] + src2[6i + 4];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2106 int r= src1[6i + 2] + src1[6i + 5] + src2[6i + 2] + src2[6i + 5];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2107
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2108 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2109 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2110 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2111 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2112 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2113
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2114 static inline void RENAME(bgr16ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2115 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2116 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2117 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2118 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2119 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2120 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2121 int g= (d>>5)&0x3F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2122 int r= (d>>11)&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2123
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2124 dst[i]= ((2RYr + GYg + 2BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2125 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2126 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2127
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2128 static inline void RENAME(bgr16ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2129 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2130 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2131 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2132 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2133 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2134 int d1= ((uint32_t*)src2)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2135
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2136 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2137 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2138
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2139 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2140 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2141
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2142 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2143 int r= (d>>11)&0x7F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2144 int g= d>>21;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2145 dstU[i]= ((2RUr + GUg + 2BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2146 dstV[i]= ((2RVr + GVg + 2BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2147 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2148 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2149
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2150 static inline void RENAME(bgr15ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2151 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2152 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2153 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2154 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2155 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2156 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2157 int g= (d>>5)&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2158 int r= (d>>10)&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2159
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2160 dst[i]= ((RYr + GYg + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2161 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2162 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2163
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2164 static inline void RENAME(bgr15ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2165 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2166 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2167 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2168 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2169 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2170 int d1= ((uint32_t*)src2)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2171
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2172 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2173 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2174
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2175 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2176 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2177
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2178 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2179 int r= (d>>10)&0x7F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2180 int g= d>>21;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2181 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2182 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2183 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2184 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2185
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2186
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2187 static inline void RENAME(rgb32ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2188 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2189 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2190 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2191 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2192 int r= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2193 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2194 int b= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2195
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2196 dst[i]= ((RYr + GYg + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2197 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2198 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2199
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2200 static inline void RENAME(rgb32ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2201 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2202 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2203 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2204 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2205 const int a= ((uint32_t)src1)[2i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2206 const int e= ((uint32_t)src1)[2i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2207 const int c= ((uint32_t)src2)[2i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2208 const int d= ((uint32_t)src2)[2i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2209 const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2210 const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2211 const int r= l&0x3FF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2212 const int g= h>>8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2213 const int b= l>>16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2214
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2215 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2216 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2217 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2218 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2219
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2220 static inline void RENAME(rgb24ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2221 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2222 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2223 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2224 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2225 int r= src[i*3+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2226 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2227 int b= src[i*3+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2228
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2229 dst[i]= ((RYr + GYg + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2230 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2231 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2232
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2233 static inline void RENAME(rgb24ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2234 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2235 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2236 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2237 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2238 int r= src1[6i + 0] + src1[6i + 3] + src2[6i + 0] + src2[6i + 3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2239 int g= src1[6i + 1] + src1[6i + 4] + src2[6i + 1] + src2[6i + 4];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2240 int b= src1[6i + 2] + src1[6i + 5] + src2[6i + 2] + src2[6i + 5];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2241
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2242 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2243 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2244 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2245 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2246
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2247
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2248 // Bilinear / Bicubic scaling
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2249 static inline void RENAME(hScale)(int16_t dst, int dstW, uint8_t src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2250 int16_t filter, int16_t filterPos, long filterSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2251 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2252 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2253 assert(filterSize % 4 == 0 && filterSize>0);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2254 if(filterSize==4) // allways true for upscaling, sometimes for down too
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2255 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2256 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2257 filter-= counter*2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2258 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2259 dst-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2260 asm volatile(
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2261 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2262 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2263 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2264 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2265 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2266 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2267 "mov %%"REG_a", %%"REG_BP" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2268 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2269 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2270 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2271 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2272 "movq (%1, %%"REG_BP", 4), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2273 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2274 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2275 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2276 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2277 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2278 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2279 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2280 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2281 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2282 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2283 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2284 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2285 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2286 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2287 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2288
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2289 "pop %%"REG_BP" \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2290 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2291 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2292 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2293 : "+a" (counter)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2294 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2295 #if !defined(PIC)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2296 : "%"REG_b
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2297 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2298 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2299 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2300 else if(filterSize==8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2301 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2302 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2303 filter-= counter*4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2304 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2305 dst-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2306 asm volatile(
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2307 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2308 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2309 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2310 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2311 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2312 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2313 "mov %%"REG_a", %%"REG_BP" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2314 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2315 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2316 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2317 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2318 "movq (%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2319 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2320 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2321 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2322 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2323 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2324 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2325 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2326
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2327 "movq 8(%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2328 "movq 24(%1, %%"REG_BP", 8), %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2329 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2330 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2331 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2332 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2333 "pmaddwd %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2334 "pmaddwd %%mm2, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2335 "paddd %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2336 "paddd %%mm5, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2337
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2338 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2339 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2340 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2341 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2342 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2343 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2344 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2345 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2346
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2347 "pop %%"REG_BP" \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2348 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2349 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2350 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2351 : "+a" (counter)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2352 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2353 #if !defined(PIC)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2354 : "%"REG_b
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2355 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2356 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2357 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2358 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2359 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2360 uint8_t *offset = src+filterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2361 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2362 // filter-= counter*filterSize/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2363 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2364 dst-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2365 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2366 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2367 "movq "MANGLE(w02)", %%mm6 \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2368 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2369 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2370 "mov %2, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2371 "movzwl (%%"REG_c", %0), %%eax \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2372 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2373 "mov %5, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2374 "pxor %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2375 "pxor %%mm5, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2376 "2: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2377 "movq (%1), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2378 "movq (%1, %6), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2379 "movd (%%"REG_c", %%"REG_a"), %%mm0\n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2380 "movd (%%"REG_c", %%"REG_d"), %%mm2\n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2381 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2382 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2383 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2384 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2385 "paddd %%mm3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2386 "paddd %%mm0, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2387 "add $8, %1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2388 "add $4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2389 "cmp %4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2390 " jb 2b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2391 "add %6, %1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2392 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2393 "psrad $8, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2394 "packssdw %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2395 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2396 "packssdw %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2397 "mov %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2398 "movd %%mm4, (%%"REG_a", %0) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2399 "add $4, %0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2400 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2401
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2402 : "+r" (counter), "+r" (filter)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2403 : "m" (filterPos), "m" (dst), "m"(offset),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2404 "m" (src), "r" (filterSize*2)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2405 : "%"REG_a, "%"REG_c, "%"REG_d
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2406 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2407 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2408 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2409 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2410 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2411 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2412 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2413 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2414 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2415 int j;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2416 int srcPos= filterPos[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2417 int val=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2418 // printf("filterPos: %d\n", filterPos[i]);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2419 for(j=0; j<filterSize; j++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2420 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2421 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2422 val += ((int)src[srcPos + j])filter[filterSizei + j];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2423 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2424 // filter += hFilterSize;
19181 e40cf0305d4e Replace MIN() and MAX() with FFMIN() and FFMAX() lucabe parents: 19173 diff changeset	2425 dst[i] = FFMIN(FFMAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ...
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2426 // dst[i] = val>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2427 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2428 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2429 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2430 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2431 // *** horizontal scale Y line to temp buffer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2432 static inline void RENAME(hyscale)(uint16_t dst, long dstWidth, uint8_t src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2433 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2434 int16_t hLumFilterPos, int hLumFilterSize, void funnyYCode,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2435 int srcFormat, uint8_t formatConvBuffer, int16_t mmx2Filter,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2436 int32_t *mmx2FilterPos)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2437 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2438 if(srcFormat==IMGFMT_YUY2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2439 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2440 RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2441 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2442 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2443 else if(srcFormat==IMGFMT_UYVY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2444 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2445 RENAME(uyvyToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2446 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2447 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2448 else if(srcFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2449 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2450 RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2451 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2452 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2453 else if(srcFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2454 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2455 RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2456 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2457 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2458 else if(srcFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2459 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2460 RENAME(bgr16ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2461 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2462 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2463 else if(srcFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2464 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2465 RENAME(bgr15ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2466 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2467 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2468 else if(srcFormat==IMGFMT_RGB32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2469 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2470 RENAME(rgb32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2471 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2472 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2473 else if(srcFormat==IMGFMT_RGB24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2474 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2475 RENAME(rgb24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2476 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2477 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2478
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2479 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2480 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2481 if(!(flags&SWS_FAST_BILINEAR) \|\| (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2482 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2483 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2484 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2485 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2486 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2487 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2488 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2489 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2490 #if defined(ARCH_X86) \|\| defined(ARCH_X86_64)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2491 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2492 int i;
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2493 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2494 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2495 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2496 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2497 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2498 asm volatile(
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2499 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2500 "mov %%"REG_b", %5 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2501 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2502 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2503 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2504 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2505 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2506 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2507 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2508 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2509 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2510 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2511
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2512 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2513
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2514 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2515 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2516 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2517 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2518 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2519 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2520 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2521
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2522 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2523
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2524 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2525 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2526 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2527 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2528 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2529 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2530
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2531 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2532
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2533 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2534 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2535 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2536 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2537 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2538 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2539 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2540 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2541
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2542 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2543 "mov %5, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2544 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2545 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2546 "m" (funnyYCode)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2547 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2548 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2549 #endif
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2550 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2551 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2552 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2553 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2554 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2555 for(i=dstWidth-1; (ixInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2556 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2557 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2558 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2559 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2560 long xInc_shr16 = xInc >> 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2561 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2562 //NO MMX just normal asm ...
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2563 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2564 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2565 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2566 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2567 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2568 "1: \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2569 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2570 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2571 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2572 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])2xalpha
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2573 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2574 "addl %%edi, %%esi \n\t" //src[xx+1]2xalpha + src[xx](1-2xalpha)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2575 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2576 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2577 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2578 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2579 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2580
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2581 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2582 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2583 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2584 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])2xalpha
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2585 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2586 "addl %%edi, %%esi \n\t" //src[xx+1]2xalpha + src[xx](1-2xalpha)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2587 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2588 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2589 "movw %%si, 2(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2590 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2591 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2592
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2593
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2594 "add $2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2595 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2596 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2597
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2598
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2599 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2600 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2601 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2602 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2603 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2604 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2605 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2606 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2607 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2608 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2609 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2610 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2611 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2612 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2613 xpos+=xInc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2614 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2615 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2616 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2617 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2618
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2619 inline static void RENAME(hcscale)(uint16_t dst, long dstWidth, uint8_t src1, uint8_t *src2,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2620 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2621 int16_t hChrFilterPos, int hChrFilterSize, void funnyUVCode,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2622 int srcFormat, uint8_t formatConvBuffer, int16_t mmx2Filter,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2623 int32_t *mmx2FilterPos)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2624 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2625 if(srcFormat==IMGFMT_YUY2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2626 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2627 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2628 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2629 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2630 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2631 else if(srcFormat==IMGFMT_UYVY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2632 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2633 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2634 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2635 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2636 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2637 else if(srcFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2638 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2639 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2640 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2641 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2642 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2643 else if(srcFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2644 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2645 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2646 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2647 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2648 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2649 else if(srcFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2650 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2651 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2652 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2653 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2654 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2655 else if(srcFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2656 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2657 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2658 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2659 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2660 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2661 else if(srcFormat==IMGFMT_RGB32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2662 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2663 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2664 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2665 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2666 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2667 else if(srcFormat==IMGFMT_RGB24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2668 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2669 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2670 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2671 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2672 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2673 else if(isGray(srcFormat))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2674 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2675 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2676 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2677
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2678 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2679 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2680 if(!(flags&SWS_FAST_BILINEAR) \|\| (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2681 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2682 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2683 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2684 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2685 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2686 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2687 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2688 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2689 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2690 #if defined(ARCH_X86) \|\| defined(ARCH_X86_64)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2691 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2692 int i;
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2693 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2694 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2695 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2696 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2697 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2698 asm volatile(
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2699 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2700 "mov %%"REG_b", %6 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2701 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2702 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2703 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2704 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2705 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2706 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2707 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2708 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2709 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2710 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2711
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2712 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2713
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2714 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2715 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2716 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2717 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2718 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2719 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2720 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2721
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2722 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2723
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2724 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2725 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2726 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2727 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2728 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2729 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2730
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2731 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2732
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2733 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2734 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2735 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2736 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2737 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2738 "mov %5, %%"REG_c" \n\t" // src
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2739 "mov %1, %%"REG_D" \n\t" // buf1
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2740 "add $4096, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2741 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2742 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2743 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2744
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2745 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2746 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2747 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2748 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2749
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2750 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2751 "mov %6, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2752 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2753 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2754 "m" (funnyUVCode), "m" (src2)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2755 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2756 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2757 #endif
19400 0310c3310360 Fix compilation with -no-PIC and without -fomit-frame-pointer (used by uau parents: 19396 diff changeset	2758 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2759 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2760 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2761 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2762 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2763 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2764 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2765 // printf("%d %d %d\n", dstWidth, i, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2766 dst[i] = src1[srcW-1]*128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2767 dst[i+2048] = src2[srcW-1]*128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2768 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2769 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2770 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2771 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2772 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2773 long xInc_shr16 = (long) (xInc >> 16);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2774 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2775 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2776 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2777 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2778 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2779 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2780 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2781 "mov %0, %%"REG_S" \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2782 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2783 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2784 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2785 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])2xalpha
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2786 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2787 "addl %%edi, %%esi \n\t" //src[xx+1]2xalpha + src[xx](1-2xalpha)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2788 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2789 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2790 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2791
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2792 "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2793 "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2794 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2795 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])2xalpha
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2796 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2797 "addl %%edi, %%esi \n\t" //src[xx+1]2xalpha + src[xx](1-2xalpha)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2798 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2799 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2800 "movw %%si, 4096(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2801
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2802 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2803 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2804 "add $1, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2805 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2806 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2807
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2808 /* GCC-3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2809 which is needed to support GCC-4.0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2810 #if defined(ARCH_X86_64) && ((__GNUC__ > 3) \|\| ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2811 :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2812 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2813 :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2814 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2815 "r" (src2)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2816 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2817 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2818 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2819 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2820 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2821 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2822 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2823 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2824 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2825 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2826 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2827 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2828 dst[i]=(src1[xx](xalpha^127)+src1[xx+1]xalpha);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2829 dst[i+2048]=(src2[xx](xalpha^127)+src2[xx+1]xalpha);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2830 /* slower
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2831 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2832 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2833 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2834 xpos+=xInc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2835 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2836 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2837 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2838 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2839
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2840 static int RENAME(swScale)(SwsContext c, uint8_t src[], int srcStride[], int srcSliceY,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2841 int srcSliceH, uint8_t* dst[], int dstStride[]){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2842
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2843 /* load a few things into local vars to make the code more readable? and faster */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2844 const int srcW= c->srcW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2845 const int dstW= c->dstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2846 const int dstH= c->dstH;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2847 const int chrDstW= c->chrDstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2848 const int chrSrcW= c->chrSrcW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2849 const int lumXInc= c->lumXInc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2850 const int chrXInc= c->chrXInc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2851 const int dstFormat= c->dstFormat;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2852 const int srcFormat= c->srcFormat;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2853 const int flags= c->flags;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2854 const int canMMX2BeUsed= c->canMMX2BeUsed;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2855 int16_t *vLumFilterPos= c->vLumFilterPos;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2856 int16_t *vChrFilterPos= c->vChrFilterPos;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2857 int16_t *hLumFilterPos= c->hLumFilterPos;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2858 int16_t *hChrFilterPos= c->hChrFilterPos;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2859 int16_t *vLumFilter= c->vLumFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2860 int16_t *vChrFilter= c->vChrFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2861 int16_t *hLumFilter= c->hLumFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2862 int16_t *hChrFilter= c->hChrFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2863 int32_t *lumMmxFilter= c->lumMmxFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2864 int32_t *chrMmxFilter= c->chrMmxFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2865 const int vLumFilterSize= c->vLumFilterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2866 const int vChrFilterSize= c->vChrFilterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2867 const int hLumFilterSize= c->hLumFilterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2868 const int hChrFilterSize= c->hChrFilterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2869 int16_t **lumPixBuf= c->lumPixBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2870 int16_t **chrPixBuf= c->chrPixBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2871 const int vLumBufSize= c->vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2872 const int vChrBufSize= c->vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2873 uint8_t *funnyYCode= c->funnyYCode;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2874 uint8_t *funnyUVCode= c->funnyUVCode;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2875 uint8_t *formatConvBuffer= c->formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2876 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2877 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2878 int lastDstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2879
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2880 /* vars whch will change and which we need to storw back in the context */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2881 int dstY= c->dstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2882 int lumBufIndex= c->lumBufIndex;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2883 int chrBufIndex= c->chrBufIndex;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2884 int lastInLumBuf= c->lastInLumBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2885 int lastInChrBuf= c->lastInChrBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2886
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2887 if(isPacked(c->srcFormat)){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2888 src[0]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2889 src[1]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2890 src[2]= src[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2891 srcStride[0]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2892 srcStride[1]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2893 srcStride[2]= srcStride[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2894 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2895 srcStride[1]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2896 srcStride[2]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2897
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2898 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2899 // (int)dst[0], (int)dst[1], (int)dst[2]);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2900
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2901 #if 0 //self test FIXME move to a vfilter or something
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2902 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2903 static volatile int i=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2904 i++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2905 if(srcFormat==IMGFMT_YV12 && i==1 && srcSliceH>= c->srcH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2906 selfTest(src, srcStride, c->srcW, c->srcH);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2907 i--;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2908 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2909 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2910
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2911 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2912 //dstStride[0],dstStride[1],dstStride[2]);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2913
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2914 if(dstStride[0]%8 !=0 \|\| dstStride[1]%8 !=0 \|\| dstStride[2]%8 !=0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2915 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2916 static int firstTime=1; //FIXME move this into the context perhaps
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2917 if(flags & SWS_PRINT_INFO && firstTime)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2918 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2919 MSG_WARN("SwScaler: Warning: dstStride is not aligned!\n"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2920 "SwScaler: ->cannot do aligned memory acesses anymore\n");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2921 firstTime=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2922 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2923 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2924
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2925 /* Note the user might start scaling the picture in the middle so this will not get executed
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2926 this is not really intended but works currently, so ppl might do it */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2927 if(srcSliceY ==0){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2928 lumBufIndex=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2929 chrBufIndex=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2930 dstY=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2931 lastInLumBuf= -1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2932 lastInChrBuf= -1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2933 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2934
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2935 lastDstY= dstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2936
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2937 for(;dstY < dstH; dstY++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2938 unsigned char dest =dst[0]+dstStride[0]dstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2939 const int chrDstY= dstY>>c->chrDstVSubSample;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2940 unsigned char uDest=dst[1]+dstStride[1]chrDstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2941 unsigned char vDest=dst[2]+dstStride[2]chrDstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2942
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2943 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2944 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2945 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2946 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2947
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2948 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2949 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2950 //handle holes (FAST_BILINEAR & weird filters)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2951 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2952 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2953 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2954 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2955 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2956
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2957 // Do we have enough lines in this slice to output the dstY line
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2958 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2959 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2960 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2961 while(lastInLumBuf < lastLumSrcY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2962 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2963 uint8_t s= src[0]+(lastInLumBuf + 1 - srcSliceY)srcStride[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2964 lumBufIndex++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2965 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2966 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2967 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2968 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2969 // printf("%d %d\n", lumBufIndex, vLumBufSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2970 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2971 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2972 funnyYCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2973 c->lumMmx2Filter, c->lumMmx2FilterPos);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2974 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2975 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2976 while(lastInChrBuf < lastChrSrcY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2977 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2978 uint8_t src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)srcStride[1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2979 uint8_t src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)srcStride[2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2980 chrBufIndex++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2981 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2982 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2983 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2984 //FIXME replace parameters through context struct (some at least)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2985
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2986 if(!(isGray(srcFormat) \|\| isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2987 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2988 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2989 funnyUVCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2990 c->chrMmx2Filter, c->chrMmx2FilterPos);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2991 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2992 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2993 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2994 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2995 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2996 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2997 else // not enough lines left in this slice -> load the rest in the buffer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2998 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2999 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3000 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3001 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3002 vChrBufSize, vLumBufSize);*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3003
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3004 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3005 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3006 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3007 uint8_t s= src[0]+(lastInLumBuf + 1 - srcSliceY)srcStride[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3008 lumBufIndex++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3009 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3010 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3011 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3012 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3013 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3014 funnyYCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3015 c->lumMmx2Filter, c->lumMmx2FilterPos);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3016 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3017 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3018 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3019 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3020 uint8_t src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)srcStride[1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3021 uint8_t src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)srcStride[2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3022 chrBufIndex++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3023 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3024 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3025 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3026
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3027 if(!(isGray(srcFormat) \|\| isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3028 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3029 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3030 funnyUVCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3031 c->chrMmx2Filter, c->chrMmx2FilterPos);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3032 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3033 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3034 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3035 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3036 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3037 break; //we can't output a dstY line so let's try with the next slice
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3038 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3039
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3040 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3041 b5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3042 g6Dither= dither4[dstY&1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3043 g5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3044 r5Dither= dither8[(dstY+1)&1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3045 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3046 if(dstY < dstH-2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3047 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3048 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3049 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3050 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3051 int i;
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3052 if(flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3053 for(i=0; i<vLumFilterSize; i+=2){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3054 lumMmxFilter[2*i+0]= lumSrcPtr[i ];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3055 lumMmxFilter[2*i+1]= lumSrcPtr[i+(vLumFilterSize>1)];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3056 lumMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3057 lumMmxFilter[2i+3]= vLumFilter[dstYvLumFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3058 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3059 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3060 for(i=0; i<vChrFilterSize; i+=2){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3061 chrMmxFilter[2*i+0]= chrSrcPtr[i ];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3062 chrMmxFilter[2*i+1]= chrSrcPtr[i+(vChrFilterSize>1)];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3063 chrMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3064 chrMmxFilter[2i+3]= vChrFilter[chrDstYvChrFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3065 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3066 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3067 }else{
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3068 for(i=0; i<vLumFilterSize; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3069 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3070 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3071 lumMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3072 lumMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3073 ((uint16_t)vLumFilter[dstYvLumFilterSize + i])0x10001;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3074 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3075 for(i=0; i<vChrFilterSize; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3076 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3077 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3078 chrMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3079 chrMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3080 ((uint16_t)vChrFilter[chrDstYvChrFilterSize + i])0x10001;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3081 }
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3082 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3083 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3084 if(dstFormat == IMGFMT_NV12 \|\| dstFormat == IMGFMT_NV21){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3085 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3086 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3087 RENAME(yuv2nv12X)(c,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3088 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3089 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3090 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3091 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3092 else if(isPlanarYUV(dstFormat) \|\| isGray(dstFormat)) //YV12 like
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3093 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3094 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3095 if((dstY&chrSkipMask) \|\| isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3096 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3097 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3098 int16_t *lumBuf = lumPixBuf[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3099 int16_t *chrBuf= chrPixBuf[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3100 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3101 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3102 else //General YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3103 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3104 RENAME(yuv2yuvX)(c,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3105 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3106 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3107 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3108 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3109 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3110 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3111 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3112 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3113 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3114 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3115 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3116 int chrAlpha= vChrFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3117 RENAME(yuv2packed1)(c, lumSrcPtr, chrSrcPtr, *(chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3118 dest, dstW, chrAlpha, dstFormat, flags, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3119 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3120 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3121 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3122 int lumAlpha= vLumFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3123 int chrAlpha= vChrFilter[2*dstY+1];
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3124 lumMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3125 lumMmxFilter[3]= vLumFilter[2dstY ]0x10001;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3126 chrMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3127 chrMmxFilter[3]= vChrFilter[2chrDstY]0x10001;
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3128 RENAME(yuv2packed2)(c, lumSrcPtr, (lumSrcPtr+1), chrSrcPtr, (chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3129 dest, dstW, lumAlpha, chrAlpha, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3130 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3131 else //General RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3132 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3133 RENAME(yuv2packedX)(c,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3134 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3135 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3136 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3137 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3138 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3139 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3140 else // hmm looks like we can't use MMX here without overwriting this array's tail
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3141 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3142 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3143 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3144 if(dstFormat == IMGFMT_NV12 \|\| dstFormat == IMGFMT_NV21){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3145 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3146 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3147 yuv2nv12XinC(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3148 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3149 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3150 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3151 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3152 else if(isPlanarYUV(dstFormat) \|\| isGray(dstFormat)) //YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3153 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3154 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3155 if((dstY&chrSkipMask) \|\| isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3156 yuv2yuvXinC(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3157 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3158 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3159 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3160 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3161 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3162 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3163 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3164 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3165 yuv2packedXinC(c,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3166 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3167 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3168 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3169 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3170 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3171 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3172
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3173 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3174 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3175 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3176 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3177 /* store changed local vars back in the context */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3178 c->dstY= dstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3179 c->lumBufIndex= lumBufIndex;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3180 c->chrBufIndex= chrBufIndex;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3181 c->lastInLumBuf= lastInLumBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3182 c->lastInChrBuf= lastInChrBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3183
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3184 return dstY - lastDstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3185 }

Mercurial > mplayer.hg

annotate libswscale/swscale_template.c @ 19619:a83e5b8d2e63