mplayer.hg: libswscale/swscale

annotate libswscale/swscale_template.c @ 19640:521f71200591

Display track names in matroska files. Patch by Paul Lebedev _paul d lebedev a gmail d com_.

author	eugeni
date	Sat, 02 Sep 2006 19:39:24 +0000
parents	4678e9f81334
children	8e50cba9fe03

rev	line source
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1 /*
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2 Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	4 This program is free software; you can redistribute it and/or modify
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	5 it under the terms of the GNU General Public License as published by
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	6 the Free Software Foundation; either version 2 of the License, or
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	7 (at your option) any later version.
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	8
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	9 This program is distributed in the hope that it will be useful,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	10 but WITHOUT ANY WARRANTY; without even the implied warranty of
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	12 GNU General Public License for more details.
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	13
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	14 You should have received a copy of the GNU General Public License
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	15 along with this program; if not, write to the Free Software
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19594 4678e9f81334 make the C code of the swscaler which i wrote LGPL michael parents: 19400 diff changeset	17
4678e9f81334 make the C code of the swscaler which i wrote LGPL michael parents: 19400 diff changeset	18 the C code (not assembly, mmx, ...) of the swscaler which has been written
4678e9f81334 make the C code of the swscaler which i wrote LGPL michael parents: 19400 diff changeset	19 by Michael Niedermayer can be used under the LGPL license too
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	20 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	21
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	22 #undef REAL_MOVNTQ
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	23 #undef MOVNTQ
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	24 #undef PAVGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	25 #undef PREFETCH
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	26 #undef PREFETCHW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	27 #undef EMMS
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	28 #undef SFENCE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	29
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	30 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	31 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	32 #define EMMS "femms"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	33 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	34 #define EMMS "emms"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	35 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	36
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	37 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	38 #define PREFETCH "prefetch"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	39 #define PREFETCHW "prefetchw"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	40 #elif defined ( HAVE_MMX2 )
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	41 #define PREFETCH "prefetchnta"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	42 #define PREFETCHW "prefetcht0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	43 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	44 #define PREFETCH "/nop"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	45 #define PREFETCHW "/nop"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	46 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	47
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	48 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	49 #define SFENCE "sfence"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	50 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	51 #define SFENCE "/nop"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	52 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	53
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	54 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	55 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	56 #elif defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	57 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	58 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	59
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	60 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	61 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	62 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	63 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	64 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	65 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	66
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	67 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	68 #include "swscale_altivec_template.c"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	69 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	70
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	71 #define YSCALEYUV2YV12X(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	72 asm volatile(\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	73 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	74 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	75 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	76 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	77 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	78 ASMALIGN(4) /* FIXME Unroll? */\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	79 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	80 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	81 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	82 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	83 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	84 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	85 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	86 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	87 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	88 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	89 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	90 " jnz 1b \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	91 "psraw $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	92 "psraw $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	93 "packuswb %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	94 MOVNTQ(%%mm3, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	95 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	96 "cmp %2, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	97 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	98 "movq %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	99 "lea " offset "(%0), %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	100 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	101 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	102 :: "r" (&c->redDither),\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	103 "r" (dest), "p" (width)\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	104 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	105 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	106
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	107 #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	108 asm volatile(\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	109 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	110 "xor %%"REG_a", %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	111 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	112 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	113 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	114 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	115 "mov (%%"REG_d"), %%"REG_S" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	116 ASMALIGN(4) \
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	117 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	118 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	119 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	120 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	121 "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	122 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	123 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	124 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	125 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	126 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	127 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	128 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	129 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	130 "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	131 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	132 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	133 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	134 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	135 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	136 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	137 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	138 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	139 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	140 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	141 " jnz 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	142 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	143 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	144 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	145 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	146 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	147 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	148 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	149 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	150 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	151 "psraw $3, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	152 "psraw $3, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	153 "packuswb %%mm6, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	154 MOVNTQ(%%mm4, (%1, %%REGa))\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	155 "add $8, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	156 "cmp %2, %%"REG_a" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	157 "lea " offset "(%0), %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	158 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	159 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	160 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	161 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	162 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	163 "jb 1b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	164 :: "r" (&c->redDither),\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	165 "r" (dest), "p" (width)\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	166 : "%"REG_a, "%"REG_d, "%"REG_S\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	167 );
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	168
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	169 #define YSCALEYUV2YV121 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	170 "mov %2, %%"REG_a" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	171 ASMALIGN(4) /* FIXME Unroll? */\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	172 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	173 "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	174 "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	175 "psraw $7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	176 "psraw $7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	177 "packuswb %%mm1, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	178 MOVNTQ(%%mm0, (%1, %%REGa))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	179 "add $8, %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	180 "jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	181
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	182 /*
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	183 :: "m" (-lumFilterSize), "m" (-chrFilterSize),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	184 "m" (lumMmxFilter+lumFilterSize4), "m" (chrMmxFilter+chrFilterSize4),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	185 "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	186 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	187 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	188 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	189 #define YSCALEYUV2PACKEDX \
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	190 asm volatile(\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	191 "xor %%"REG_a", %%"REG_a" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	192 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	193 "nop \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	194 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	195 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	196 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	197 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	198 "movq %%mm3, %%mm4 \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	199 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	200 "2: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	201 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	202 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	203 "movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	204 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	205 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	206 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	207 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	208 "paddw %%mm2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	209 "paddw %%mm5, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	210 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	211 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	212 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	213 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	214 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	215 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	216 "movq %%mm1, %%mm7 \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	217 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	218 "2: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	219 "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	220 "movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	221 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	222 "add $16, %%"REG_d" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	223 "mov (%%"REG_d"), %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	224 "pmulhw %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	225 "pmulhw %%mm0, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	226 "paddw %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	227 "paddw %%mm5, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	228 "test %%"REG_S", %%"REG_S" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	229 " jnz 2b \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	230
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	231 #define YSCALEYUV2PACKEDX_END\
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	232 :: "r" (&c->redDither), \
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	233 "m" (dummy), "m" (dummy), "m" (dummy),\
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	234 "r" (dest), "m" (dstW)\
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	235 : "%"REG_a, "%"REG_d, "%"REG_S\
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	236 );
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	237
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	238 #define YSCALEYUV2PACKEDX_ACCURATE \
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	239 asm volatile(\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	240 "xor %%"REG_a", %%"REG_a" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	241 ASMALIGN(4)\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	242 "nop \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	243 "1: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	244 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	245 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	246 "pxor %%mm4, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	247 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	248 "pxor %%mm6, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	249 "pxor %%mm7, %%mm7 \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	250 ASMALIGN(4)\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	251 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	252 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	253 "movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	254 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	255 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	256 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	257 "punpcklwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	258 "punpckhwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	259 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	260 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	261 "pmaddwd %%mm1, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	262 "paddd %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	263 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	264 "movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	265 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	266 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	267 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	268 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	269 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	270 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	271 "pmaddwd %%mm1, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	272 "pmaddwd %%mm1, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	273 "paddd %%mm2, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	274 "paddd %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	275 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	276 "psrad $16, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	277 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	278 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	279 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	280 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	281 "packssdw %%mm5, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	282 "packssdw %%mm7, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	283 "paddw %%mm0, %%mm4 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	284 "paddw %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	285 "movq %%mm4, "U_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	286 "movq %%mm6, "V_TEMP"(%0) \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	287 \
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	288 "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	289 "mov (%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	290 "pxor %%mm1, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	291 "pxor %%mm5, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	292 "pxor %%mm7, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	293 "pxor %%mm6, %%mm6 \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	294 ASMALIGN(4)\
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	295 "2: \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	296 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	297 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	298 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	299 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	300 "movq %%mm0, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	301 "punpcklwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	302 "punpckhwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	303 "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	304 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	305 "pmaddwd %%mm4, %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	306 "paddd %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	307 "paddd %%mm3, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	308 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	309 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	310 "add $16, %%"REG_d" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	311 "test %%"REG_S", %%"REG_S" \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	312 "movq %%mm2, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	313 "punpcklwd %%mm3, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	314 "punpckhwd %%mm3, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	315 "pmaddwd %%mm4, %%mm2 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	316 "pmaddwd %%mm4, %%mm0 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	317 "paddd %%mm2, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	318 "paddd %%mm0, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	319 " jnz 2b \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	320 "psrad $16, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	321 "psrad $16, %%mm5 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	322 "psrad $16, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	323 "psrad $16, %%mm6 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	324 "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	325 "packssdw %%mm5, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	326 "packssdw %%mm6, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	327 "paddw %%mm0, %%mm1 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	328 "paddw %%mm0, %%mm7 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	329 "movq "U_TEMP"(%0), %%mm3 \n\t"\
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	330 "movq "V_TEMP"(%0), %%mm4 \n\t"\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	331
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	332 #define YSCALEYUV2RGBX \
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	333 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	334 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	335 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	336 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	337 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	338 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	339 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	340 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	341 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	342 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	343 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	344 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	345 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	346 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	347 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	348 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	349 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	350 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	351 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	352 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	353 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	354 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	355 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	356 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	357 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	358 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	359 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	360 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	361 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	362 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	363 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	364 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	365 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	366 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	367 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	368 #if 0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	369 #define FULL_YSCALEYUV2RGB \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	370 "pxor %%mm7, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	371 "movd %6, %%mm6 \n\t" /yalpha1/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	372 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	373 "punpcklwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	374 "movd %7, %%mm5 \n\t" /uvalpha1/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	375 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	376 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	377 "xor %%"REG_a", %%"REG_a" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	378 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	379 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	380 "movq (%0, %%"REG_a", 2), %%mm0 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	381 "movq (%1, %%"REG_a", 2), %%mm1 \n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	382 "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	383 "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	384 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	385 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	386 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	387 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	388 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	389 "movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	390 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	391 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	392 "movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	393 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	394 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	395 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	396 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	397 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	398 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	399 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	400 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	401 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	402 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	403 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	404 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	405 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	406 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	407 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	408 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	409 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	410 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	411 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	412 "paddw %%mm1, %%mm3 \n\t" /* B*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	413 "paddw %%mm1, %%mm0 \n\t" /* R*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	414 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	415 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	416 "packuswb %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	417 "paddw %%mm4, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	418 "paddw %%mm2, %%mm1 \n\t" /* G*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	419 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	420 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	421 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	422
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	423 #define REAL_YSCALEYUV2PACKED(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	424 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	425 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	426 "psraw $3, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	427 "psraw $3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	428 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	429 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	430 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	431 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	432 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	433 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	434 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	435 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	436 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	437 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	438 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	439 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	440 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	441 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	442 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	443 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	444 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	445 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	446 "movq (%0, "#index", 2), %%mm0 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	447 "movq (%1, "#index", 2), %%mm1 \n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	448 "movq 8(%0, "#index", 2), %%mm6 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	449 "movq 8(%1, "#index", 2), %%mm7 \n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	450 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	451 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	452 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	453 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	454 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	455 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	456 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	457 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	458
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	459 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	460
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	461 #define REAL_YSCALEYUV2RGB(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	462 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	463 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	464 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	465 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	466 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	467 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	468 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	469 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	470 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	471 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	472 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	473 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	474 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	475 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	476 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	477 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	478 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	479 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	480 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	481 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	482 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	483 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	484 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	485 "movq (%0, "#index", 2), %%mm0 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	486 "movq (%1, "#index", 2), %%mm1 \n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	487 "movq 8(%0, "#index", 2), %%mm6\n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	488 "movq 8(%1, "#index", 2), %%mm7\n\t" /buf1[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	489 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	490 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	491 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	492 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	493 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	494 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	495 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	496 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	497 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	498 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	499 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	500 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	501 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	502 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	503 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	504 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	505 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	506 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	507 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	508 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	509 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	510 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	511 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	512 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	513 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	514 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	515 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	516 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	517 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	518 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	519 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	520 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	521 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	522 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	523 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	524 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	525 #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	526
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	527 #define REAL_YSCALEYUV2PACKED1(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	528 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	529 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	530 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	531 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	532 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	533 "psraw $7, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	534 "psraw $7, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	535 "movq (%0, "#index", 2), %%mm1 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	536 "movq 8(%0, "#index", 2), %%mm7 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	537 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	538 "psraw $7, %%mm7 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	539
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	540 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	541
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	542 #define REAL_YSCALEYUV2RGB1(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	543 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	544 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	545 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	546 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	547 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	548 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	549 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	550 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	551 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	552 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	553 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	554 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	555 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	556 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	557 "movq (%0, "#index", 2), %%mm1 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	558 "movq 8(%0, "#index", 2), %%mm7 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	559 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	560 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	561 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	562 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	563 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	564 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	565 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	566 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	567 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	568 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	569 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	570 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	571 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	572 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	573 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	574 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	575 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	576 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	577 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	578 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	579 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	580 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	581 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	582 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	583 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	584 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	585 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	586 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	587 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	588 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	589 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	590
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	591 #define REAL_YSCALEYUV2PACKED1b(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	592 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	593 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	594 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	595 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	596 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	597 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	598 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	599 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	600 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	601 "psrlw $8, %%mm3 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	602 "psrlw $8, %%mm4 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	603 "movq (%0, "#index", 2), %%mm1 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	604 "movq 8(%0, "#index", 2), %%mm7 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	605 "psraw $7, %%mm1 \n\t" \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	606 "psraw $7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	607 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	608
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	609 // do vertical chrominance interpolation
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	610 #define REAL_YSCALEYUV2RGB1b(index, c) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	611 "xor "#index", "#index" \n\t"\
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	612 ASMALIGN(4)\
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	613 "1: \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	614 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	615 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	616 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	617 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	618 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	619 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	620 "psrlw $5, %%mm3 \n\t" /FIXME might overflow/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	621 "psrlw $5, %%mm4 \n\t" /FIXME might overflow/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	622 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	623 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	624 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	625 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	626 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	627 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	628 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	629 "movq (%0, "#index", 2), %%mm1 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	630 "movq 8(%0, "#index", 2), %%mm7 \n\t" /buf0[eax]/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	631 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	632 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	633 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	634 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	635 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	636 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	637 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	638 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	639 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	640 "paddw %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	641 "movq %%mm2, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	642 "movq %%mm5, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	643 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	644 "punpcklwd %%mm2, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	645 "punpcklwd %%mm5, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	646 "punpcklwd %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	647 "paddw %%mm1, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	648 "paddw %%mm1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	649 "paddw %%mm1, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	650 "punpckhwd %%mm0, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	651 "punpckhwd %%mm6, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	652 "punpckhwd %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	653 "paddw %%mm7, %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	654 "paddw %%mm7, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	655 "paddw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	656 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	657 "packuswb %%mm0, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	658 "packuswb %%mm6, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	659 "packuswb %%mm3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	660 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	661 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	662
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	663 #define REAL_WRITEBGR32(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	664 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	665 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	666 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	667 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	668 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	669 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	670 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	671 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	672 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	673 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	674 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	675 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	676 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	677 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	678 MOVNTQ(%%mm0, (dst, index, 4))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	679 MOVNTQ(%%mm2, 8(dst, index, 4))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	680 MOVNTQ(%%mm1, 16(dst, index, 4))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	681 MOVNTQ(%%mm3, 24(dst, index, 4))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	682 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	683 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	684 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	685 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	686 #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	687
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	688 #define REAL_WRITEBGR16(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	689 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	690 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	691 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	692 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	693 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	694 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	695 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	696 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	697 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	698 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	699 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	700 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	701 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	702 "psllq $3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	703 "psllq $3, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	704 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	705 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	706 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	707 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	708 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	709 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	710 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	711 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	712 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	713 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	714 #define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	715
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	716 #define REAL_WRITEBGR15(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	717 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	718 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	719 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	720 "psrlq $3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	721 "psrlq $1, %%mm5 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	722 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	723 "movq %%mm2, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	724 "movq %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	725 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	726 "punpcklbw %%mm7, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	727 "punpcklbw %%mm5, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	728 "punpckhbw %%mm7, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	729 "punpckhbw %%mm5, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	730 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	731 "psllq $2, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	732 "psllq $2, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	733 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	734 "por %%mm3, %%mm2 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	735 "por %%mm4, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	736 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	737 MOVNTQ(%%mm2, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	738 MOVNTQ(%%mm1, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	739 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	740 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	741 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	742 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	743 #define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	744
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	745 #define WRITEBGR24OLD(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	746 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	747 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	748 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	749 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	750 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	751 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	752 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	753 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	754 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	755 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	756 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	757 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	758 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	759 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	760 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	761 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	762 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	763 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	764 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	765 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	766 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	767 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	768 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	769 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	770 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	771 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	772 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	773 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	774 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	775 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	776 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	777 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	778 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	779 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	780 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	781 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	782 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	783 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	784 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	785 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	786 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	787 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	788 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	789 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	790 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	791 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	792 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	793 MOVNTQ(%%mm2, 8(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	794 MOVNTQ(%%mm3, 16(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	795 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	796 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	797 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	798 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	799 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	800
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	801 #define WRITEBGR24MMX(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	802 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	803 "movq %%mm2, %%mm1 \n\t" /* B */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	804 "movq %%mm5, %%mm6 \n\t" /* R */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	805 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	806 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	807 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	808 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	809 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	810 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	811 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	812 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	813 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	814 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	815 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	816 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	817 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	818 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	819 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	820 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	821 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	822 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	823 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	824 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	825 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	826 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	827 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	828 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	829 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	830 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	831 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	832 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	833 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	834 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	835 MOVNTQ(%%mm0, (dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	836 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	837 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	838 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	839 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	840 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	841 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	842 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	843 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	844 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	845 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	846 MOVNTQ(%%mm5, 16(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	847 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	848 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	849 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	850 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	851 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	852 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	853
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	854 #define WRITEBGR24MMX2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	855 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	856 "movq "MANGLE(M24A)", %%mm0 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	857 "movq "MANGLE(M24C)", %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	858 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	859 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	860 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	861 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	862 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	863 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	864 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	865 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	866 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	867 "por %%mm1, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	868 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	869 MOVNTQ(%%mm6, (dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	870 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	871 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	872 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	873 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	874 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	875 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	876 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	877 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	878 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	879 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	880 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	881 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	882 MOVNTQ(%%mm6, 8(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	883 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	884 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	885 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	886 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	887 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	888 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	889 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	890 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	891 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	892 "por %%mm1, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	893 "por %%mm3, %%mm6 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	894 MOVNTQ(%%mm6, 16(dst))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	895 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	896 "add $24, "#dst" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	897 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	898 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	899 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	900 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	901
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	902 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	903 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	904 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	905 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	906 #undef WRITEBGR24
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	907 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	908 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	909
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	910 #define REAL_WRITEYUY2(dst, dstw, index) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	911 "packuswb %%mm3, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	912 "packuswb %%mm4, %%mm4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	913 "packuswb %%mm7, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	914 "punpcklbw %%mm4, %%mm3 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	915 "movq %%mm1, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	916 "punpcklbw %%mm3, %%mm1 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	917 "punpckhbw %%mm3, %%mm7 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	918 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	919 MOVNTQ(%%mm1, (dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	920 MOVNTQ(%%mm7, 8(dst, index, 2))\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	921 \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	922 "add $8, "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	923 "cmp "#dstw", "#index" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	924 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	925 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	926
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	927
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	928 static inline void RENAME(yuv2yuvX)(SwsContext c, int16_t lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	929 int16_t chrFilter, int16_t *chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	930 uint8_t dest, uint8_t uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	931 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	932 #ifdef HAVE_MMX
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	933 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	934 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	935 YSCALEYUV2YV12X_ACCURATE( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	936 YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	937 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	938
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	939 YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	940 }else{
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	941 if(uDest){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	942 YSCALEYUV2YV12X( 0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	943 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	944 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	945
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	946 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	947 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	948 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	949 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	950 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	951 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	952 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	953 #else //HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	954 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	955 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	956 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	957 #endif //!HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	958 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	959 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	960
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	961 static inline void RENAME(yuv2nv12X)(SwsContext c, int16_t lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	962 int16_t chrFilter, int16_t *chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	963 uint8_t dest, uint8_t uDest, int dstW, int chrDstW, int dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	964 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	965 yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	966 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	967 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	968 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	969
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	970 static inline void RENAME(yuv2yuv1)(int16_t lumSrc, int16_t chrSrc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	971 uint8_t dest, uint8_t uDest, uint8_t *vDest, long dstW, long chrDstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	972 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	973 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	974 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	975 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	976 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	977 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	978 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	979 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	980 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	981 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	982
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	983 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	984 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	985 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	986 "g" (-chrDstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	987 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	988 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	989 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	990
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	991 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	992 YSCALEYUV2YV121
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	993 :: "r" (lumSrc + dstW), "r" (dest + dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	994 "g" (-dstW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	995 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	996 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	997 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	998 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	999 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1000 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1001 int val= lumSrc[i]>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1002
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1003 if(val&256){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1004 if(val<0) val=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1005 else val=255;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1006 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1007
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1008 dest[i]= val;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1009 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1010
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1011 if(uDest != NULL)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1012 for(i=0; i<chrDstW; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1013 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1014 int u=chrSrc[i]>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1015 int v=chrSrc[i + 2048]>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1016
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1017 if((u\|v)&256){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1018 if(u<0) u=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1019 else if (u>255) u=255;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1020 if(v<0) v=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1021 else if (v>255) v=255;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1022 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1023
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1024 uDest[i]= u;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1025 vDest[i]= v;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1026 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1027 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1028 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1029
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1030
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1031 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1032 * vertical scale YV12 to RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1033 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1034 static inline void RENAME(yuv2packedX)(SwsContext c, int16_t lumFilter, int16_t **lumSrc, int lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1035 int16_t chrFilter, int16_t *chrSrc, int chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1036 uint8_t *dest, long dstW, long dstY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1037 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1038 long dummy=0;
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1039 #ifdef HAVE_MMX
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1040 if(c->flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1041 switch(c->dstFormat){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1042 case IMGFMT_BGR32:
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1043 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1044 YSCALEYUV2RGBX
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1045 WRITEBGR32(%4, %5, %%REGa)
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1046
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1047 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1048 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1049 case IMGFMT_BGR24:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1050 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1051 YSCALEYUV2RGBX
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1052 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1053 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1054 WRITEBGR24(%%REGc, %5, %%REGa)
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1055
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1056
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1057 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1058 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1059 "r" (dest), "m" (dstW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1060 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1061 );
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1062 return;
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1063 case IMGFMT_BGR15:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1064 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1065 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1066 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1067 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1068 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1069 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1070 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1071 #endif
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1072
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1073 WRITEBGR15(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1074 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1075 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1076 case IMGFMT_BGR16:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1077 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1078 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1079 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1080 #ifdef DITHER1XBPP
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1081 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1082 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1083 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1084 #endif
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1085
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1086 WRITEBGR16(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1087 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1088 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1089 case IMGFMT_YUY2:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1090 YSCALEYUV2PACKEDX_ACCURATE
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1091 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1092
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1093 "psraw $3, %%mm3 \n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1094 "psraw $3, %%mm4 \n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1095 "psraw $3, %%mm1 \n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1096 "psraw $3, %%mm7 \n\t"
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1097 WRITEYUY2(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1098 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1099 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1100 }
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1101 }else{
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1102 switch(c->dstFormat)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1103 {
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1104 case IMGFMT_BGR32:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1105 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1106 YSCALEYUV2RGBX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1107 WRITEBGR32(%4, %5, %%REGa)
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1108 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1109 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1110 case IMGFMT_BGR24:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1111 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1112 YSCALEYUV2RGBX
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1113 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1114 "add %4, %%"REG_c" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1115 WRITEBGR24(%%REGc, %5, %%REGa)
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1116
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1117 :: "r" (&c->redDither),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1118 "m" (dummy), "m" (dummy), "m" (dummy),
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1119 "r" (dest), "m" (dstW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1120 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1121 );
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1122 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1123 case IMGFMT_BGR15:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1124 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1125 YSCALEYUV2RGBX
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1126 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1127 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1128 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1129 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1130 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1131 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1132
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1133 WRITEBGR15(%4, %5, %%REGa)
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1134 YSCALEYUV2PACKEDX_END
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1135 return;
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1136 case IMGFMT_BGR16:
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1137 YSCALEYUV2PACKEDX
dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1138 YSCALEYUV2RGBX
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1139 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1140 #ifdef DITHER1XBPP
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1141 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1142 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1143 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1144 #endif
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1145
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1146 WRITEBGR16(%4, %5, %%REGa)
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1147 YSCALEYUV2PACKEDX_END
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1148 return;
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1149 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1150 YSCALEYUV2PACKEDX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1151 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1152
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1153 "psraw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1154 "psraw $3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1155 "psraw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1156 "psraw $3, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1157 WRITEYUY2(%4, %5, %%REGa)
19173 dbdc58b6e9bb a tiny bit of cleanup michael parents: 19172 diff changeset	1158 YSCALEYUV2PACKEDX_END
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1159 return;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1160 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	1161 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1162 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1163 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1164 /* The following list of supported dstFormat values should
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1165 match what's found in the body of altivec_yuv2packedX() */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1166 if(c->dstFormat==IMGFMT_ABGR \|\| c->dstFormat==IMGFMT_BGRA \|\|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1167 c->dstFormat==IMGFMT_BGR24 \|\| c->dstFormat==IMGFMT_RGB24 \|\|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1168 c->dstFormat==IMGFMT_RGBA \|\| c->dstFormat==IMGFMT_ARGB)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1169 altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1170 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1171 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1172 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1173 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1174 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1175 chrFilter, chrSrc, chrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1176 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1177 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1178
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1179 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1180 * vertical bilinear scale YV12 to RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1181 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1182 static inline void RENAME(yuv2packed2)(SwsContext c, uint16_t buf0, uint16_t buf1, uint16_t uvbuf0, uint16_t *uvbuf1,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1183 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1184 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1185 int yalpha1=yalpha^4095;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1186 int uvalpha1=uvalpha^4095;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1187 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1188
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1189 #if 0 //isn't used
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1190 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1191 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1192 switch(dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1193 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1194 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1195 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1196 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1197
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1198
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1199 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1200 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1201 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1202
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1203 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1204 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1205 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1206
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1207 MOVNTQ(%%mm3, (%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1208 MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1209
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1210 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1211 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1212 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1213
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1214
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1215 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1216 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1217 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1218 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1219 break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1220 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1221 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1222
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1223 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1224
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1225 // lsb ... msb
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1226 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1227 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1228
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1229 "movq %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1230 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1231 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1232
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1233 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1234 "psrlq $8, %%mm3 \n\t" // GR0BGR00
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1235 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1236 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1237 "por %%mm2, %%mm3 \n\t" // BGRBGR00
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1238 "movq %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1239 "psllq $48, %%mm1 \n\t" // 000000BG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1240 "por %%mm1, %%mm3 \n\t" // BGRBGRBG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1241
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1242 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1243 "psrld $16, %%mm2 \n\t" // R000R000
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1244 "psrlq $24, %%mm1 \n\t" // 0BGR0000
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1245 "por %%mm2, %%mm1 \n\t" // RBGRR000
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1246
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1247 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1248 "add %%"REG_a", %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1249
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1250 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1251 //FIXME Alignment
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1252 "movntq %%mm3, (%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1253 "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1254 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1255 "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1256 "psrlq $32, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1257 "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1258 "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1259 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1260 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1261 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1262 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1263
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1264 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1265 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1266 : "%"REG_a, "%"REG_b
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1267 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1268 break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1269 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1270 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1271
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1272 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1273 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1274 "paddusb "MANGLE(g5Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1275 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1276 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1277 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1278 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1279 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1280 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1281
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1282 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1283 "psllw $2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1284 "psllw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1285 "pand "MANGLE(g15Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1286 "pand "MANGLE(r15Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1287
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1288 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1289 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1290
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1291 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1292
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1293 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1294 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1295 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1296
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1297 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1298 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1299 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1300 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1301 break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1302 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1303 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1304
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1305 FULL_YSCALEYUV2RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1306 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1307 "paddusb "MANGLE(g6Dither)", %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1308 "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1309 "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1310 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1311 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1312 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1313 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1314
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1315 "psrlw $3, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1316 "psllw $3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1317 "psllw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1318 "pand "MANGLE(g16Mask)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1319 "pand "MANGLE(r16Mask)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1320
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1321 "por %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1322 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1323
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1324 MOVNTQ(%%mm0, (%4, %%REGa, 2))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1325
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1326 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1327 "cmp %5, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1328 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1329
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1330 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1331 "m" (yalpha1), "m" (uvalpha1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1332 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1333 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1334 break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1335 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1336 case IMGFMT_RGB32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1337 #ifndef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1338 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1339 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1340 if(dstFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1341 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1342 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1343 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1344 dest++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1345 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1346 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1347 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1348 int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>19)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1349 int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1350 int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1351 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1352 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1353 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1354 dest+= 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1355 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1356 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1357 else if(dstFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1358 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1359 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1360 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1361 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1362 int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>19)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1363 int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1364 int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1365 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1366 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1367 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1368 dest+= 3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1369 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1370 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1371 else if(dstFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1372 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1373 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1374 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1375 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1376 int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>19)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1377 int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1378 int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1379
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1380 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1381 clip_table16b[(Y + yuvtab_40cf[U]) >>13] \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1382 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1383 clip_table16r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1384 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1385 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1386 else if(dstFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1387 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1388 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1389 for(i=0;i<dstW;i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1390 // vertical linear interpolation && yuv2rgb in a single step:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1391 int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>19)];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1392 int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1393 int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1394
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1395 ((uint16_t*)dest)[i] =
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1396 clip_table15b[(Y + yuvtab_40cf[U]) >>13] \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1397 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1398 clip_table15r[(Y + yuvtab_3343[V]) >>13];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1399 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1400 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1401 }//FULL_UV_IPOL
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1402 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1403 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1404 #endif // if 0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1405 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1406 switch(c->dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1407 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1408 //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1409 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1410 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1411 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1412 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1413 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1414 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1415 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1416 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1417 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1418
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1419 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1420 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1421 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1422 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1423 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1424 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1425 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1426 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1427 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1428 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1429 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1430 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1431 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1432 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1433 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1434 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1435 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1436 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1437 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1438 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1439 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1440 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1441 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1442 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1443 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1444 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1445 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1446 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1447 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1448
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1449 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1450 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1451 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1452
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1453 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1454 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1455 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1456 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1457 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1458 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1459 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1460 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1461 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1462 YSCALEYUV2RGB(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1463 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1464 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1465 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1466 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1467 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1468 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1469
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1470 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1471 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1472 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1473 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1474 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1475 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1476 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1477 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1478 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1479 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1480 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1481 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1482 YSCALEYUV2PACKED(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1483 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1484 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1485 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1486 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1487 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1488 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1489 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1490 default: break;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1491 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1492 #endif //HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1493 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1494 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1495
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1496 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1497 * YV12 to RGB without scaling or interpolating
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1498 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1499 static inline void RENAME(yuv2packed1)(SwsContext c, uint16_t buf0, uint16_t uvbuf0, uint16_t uvbuf1,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1500 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1501 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1502 const int yalpha1=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1503 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1504
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1505 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1506 const int yalpha= 4096; //FIXME ...
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1507
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1508 if(flags&SWS_FULL_CHR_H_INT)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1509 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1510 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1511 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1512 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1513
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1514 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1515 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1516 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1517 switch(dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1518 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1519 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1520 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1521 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1522 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1523 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1524 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1525 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1526 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1527 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1528
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1529 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1530 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1531 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1532 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1533 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1534 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1535 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1536 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1537 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1538 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1539 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1540 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1541 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1542
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1543 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1544 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1545 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1546 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1547 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1548 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1549 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1550 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1551 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1552 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1553 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1554 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1555 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1556 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1557 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1558 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1559 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1560 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1561 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1562
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1563 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1564 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1565 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1566 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1567 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1568 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1569 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1570 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1571 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1572 YSCALEYUV2RGB1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1573 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1574 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1575 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1576 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1577 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1578 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1579
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1580 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1581 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1582 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1583
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1584 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1585 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1586 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1587 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1588 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1589 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1590 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1591 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1592 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1593 YSCALEYUV2PACKED1(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1594 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1595 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1596 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1597
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1598 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1599 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1600 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1601 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1602 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1603 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1604 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1605 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1606 switch(dstFormat)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1607 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1608 case IMGFMT_BGR32:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1609 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1610 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1611 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1612 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1613 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1614 WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1615 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1616 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1617
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1618 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1619 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1620 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1621 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1622 case IMGFMT_BGR24:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1623 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1624 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1625 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1626 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1627 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1628 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1629 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1630 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1631
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1632 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1633 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1634 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1635 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1636 case IMGFMT_BGR15:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1637 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1638 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1639 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1640 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1641 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1642 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1643 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1644 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1645 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1646 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1647 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1648 WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1649 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1650 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1651
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1652 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1653 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1654 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1655 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1656 case IMGFMT_BGR16:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1657 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1658 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1659 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1660 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1661 YSCALEYUV2RGB1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1662 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1663 #ifdef DITHER1XBPP
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1664 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1665 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1666 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1667 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1668
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1669 WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1670 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1671 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1672
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1673 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1674 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1675 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1676 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1677 case IMGFMT_YUY2:
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1678 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1679 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1680 "mov %4, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1681 "push %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1682 YSCALEYUV2PACKED1b(%%REGBP, %5)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1683 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1684 "pop %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1685 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1686
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1687 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1688 "a" (&c->redDither)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1689 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1690 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1691 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1692 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1693 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1694 if( uvalpha < 2048 )
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1695 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1696 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1697 }else{
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1698 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1699 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1700 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1701
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1702 //FIXME yuy2* can read upto 7 samples to much
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1703
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1704 static inline void RENAME(yuy2ToY)(uint8_t dst, uint8_t src, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1705 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1706 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1707 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1708 "movq "MANGLE(bm01010101)", %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1709 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1710 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1711 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1712 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1713 "pand %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1714 "pand %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1715 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1716 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1717 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1718 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1719 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1720 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1721 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1722 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1723 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1724 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1725 dst[i]= src[2*i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1726 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1727 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1728
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1729 static inline void RENAME(yuy2ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1730 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1731 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1732 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1733 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1734 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1735 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1736 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1737 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1738 "movq (%2, %%"REG_a",4), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1739 "movq 8(%2, %%"REG_a",4), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1740 PAVGB(%%mm2, %%mm0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1741 PAVGB(%%mm3, %%mm1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1742 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1743 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1744 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1745 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1746 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1747 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1748 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1749 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1750 "movd %%mm0, (%4, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1751 "movd %%mm1, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1752 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1753 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1754 : : "g" (-width), "r" (src1+width4), "r" (src2+width4), "r" (dstU+width), "r" (dstV+width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1755 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1756 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1757 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1758 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1759 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1760 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1761 dstU[i]= (src1[4i + 1] + src2[4i + 1])>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1762 dstV[i]= (src1[4i + 3] + src2[4i + 3])>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1763 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1764 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1765 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1766
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1767 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1768 static inline void RENAME(uyvyToY)(uint8_t dst, uint8_t src, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1769 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1770 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1771 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1772 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1773 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1774 "movq (%1, %%"REG_a",2), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1775 "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1776 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1777 "psrlw $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1778 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1779 "movq %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1780 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1781 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1782 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1783 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1784 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1785 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1786 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1787 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1788 dst[i]= src[2*i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1789 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1790 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1791
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1792 static inline void RENAME(uyvyToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1793 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1794 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1795 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1796 "movq "MANGLE(bm01010101)", %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1797 "mov %0, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1798 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1799 "movq (%1, %%"REG_a",4), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1800 "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1801 "movq (%2, %%"REG_a",4), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1802 "movq 8(%2, %%"REG_a",4), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1803 PAVGB(%%mm2, %%mm0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1804 PAVGB(%%mm3, %%mm1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1805 "pand %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1806 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1807 "packuswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1808 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1809 "psrlw $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1810 "pand %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1811 "packuswb %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1812 "packuswb %%mm1, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1813 "movd %%mm0, (%4, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1814 "movd %%mm1, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1815 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1816 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1817 : : "g" (-width), "r" (src1+width4), "r" (src2+width4), "r" (dstU+width), "r" (dstV+width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1818 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1819 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1820 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1821 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1822 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1823 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1824 dstU[i]= (src1[4i + 0] + src2[4i + 0])>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1825 dstV[i]= (src1[4i + 2] + src2[4i + 2])>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1826 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1827 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1828 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1829
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1830 static inline void RENAME(bgr32ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1831 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1832 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1833 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1834 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1835 int b= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1836 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1837 int r= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1838
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1839 dst[i]= ((RYr + GYg + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1840 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1841 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1842
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1843 static inline void RENAME(bgr32ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1844 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1845 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1846 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1847 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1848 const int a= ((uint32_t)src1)[2i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1849 const int e= ((uint32_t)src1)[2i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1850 const int c= ((uint32_t)src2)[2i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1851 const int d= ((uint32_t)src2)[2i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1852 const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1853 const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1854 const int b= l&0x3FF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1855 const int g= h>>8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1856 const int r= l>>16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1857
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1858 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1859 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1860 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1861 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1862
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1863 static inline void RENAME(bgr24ToY)(uint8_t dst, uint8_t src, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1864 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1865 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1866 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1867 "mov %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1868 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1869 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1870 "pxor %%mm7, %%mm7 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1871 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	1872 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1873 "1: \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1874 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1875 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1876 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1877 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1878 "punpcklbw %%mm7, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1879 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1880 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1881 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1882 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1883 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1884 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1885 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1886 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1887 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1888 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1889 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1890 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1891 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1892 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1893 "packssdw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1894 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1895 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1896 "pmaddwd %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1897 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1898 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1899
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1900 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1901 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1902 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1903 "punpcklbw %%mm7, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1904 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1905 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1906 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1907 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1908 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1909 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1910 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1911 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1912 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1913 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1914 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1915 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1916 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1917 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1918 "packssdw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1919 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1920 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1921 "pmaddwd %%mm5, %%mm2 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1922 "add $24, %%"REG_d" \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1923 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1924 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1925
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1926 "packuswb %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1927 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1928
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1929 "movq %%mm0, (%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1930 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1931 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1932 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1933 : "%"REG_a, "%"REG_d
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1934 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1935 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1936 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1937 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1938 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1939 int b= src[i*3+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1940 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1941 int r= src[i*3+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1942
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1943 dst[i]= ((RYr + GYg + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1944 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1945 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1946 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1947
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1948 static inline void RENAME(bgr24ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, long width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1949 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1950 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1951 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1952 "mov %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1953 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1954 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1955 "pxor %%mm7, %%mm7 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1956 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1957 "add %%"REG_d", %%"REG_d" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	1958 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1959 "1: \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1960 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1961 PREFETCH" 64(%1, %%"REG_d") \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1962 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1963 "movq (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1964 "movq (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1965 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1966 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1967 PAVGB(%%mm1, %%mm0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1968 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1969 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1970 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1971 "psrlq $24, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1972 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1973 PAVGB(%%mm1, %%mm0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1974 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1975 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1976 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1977 #else
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1978 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1979 "movd (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1980 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1981 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1982 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1983 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1984 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1985 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1986 "paddw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1987 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1988 "paddw %%mm2, %%mm0 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1989 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1990 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1991 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	1992 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1993 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1994 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1995 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1996 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1997 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1998 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1999 "paddw %%mm4, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2000 "psrlw $2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2001 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2002 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2003 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2004 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2005
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2006 "pmaddwd %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2007 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2008 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2009 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2010 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2011 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2012 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2013 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2014 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2015 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2016 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2017 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2018 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2019 "pmaddwd %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2020 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2021 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2022
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2023 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2024 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2025 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2026 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2027 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2028 PAVGB(%%mm1, %%mm4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2029 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2030 "movq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2031 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2032 "psrlq $24, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2033 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2034 PAVGB(%%mm1, %%mm4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2035 PAVGB(%%mm3, %%mm2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2036 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2037 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2038 #else
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2039 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2040 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2041 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2042 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2043 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2044 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2045 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2046 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2047 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2048 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2049 "paddw %%mm2, %%mm4 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2050 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2051 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2052 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2053 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2054 "punpcklbw %%mm7, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2055 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2056 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2057 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2058 "paddw %%mm1, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2059 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2060 "paddw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2061 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2062 "psrlw $2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2063 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2064 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2065 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2066 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2067
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2068 "pmaddwd %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2069 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2070 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2071 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2072 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2073 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2074 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2075 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2076 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2077 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2078 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2079 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2080 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2081 "pmaddwd %%mm5, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2082 "add $24, %%"REG_d" \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2083 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2084 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2085
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2086 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2087 "punpckldq %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2088 "punpckhdq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2089 "packsswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2090 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2091
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2092 "movd %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2093 "punpckhdq %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2094 "movd %%mm0, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2095 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2096 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2097 : : "r" (src1+width6), "r" (src2+width6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2098 : "%"REG_a, "%"REG_d
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2099 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2100 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2101 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2102 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2103 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2104 int b= src1[6i + 0] + src1[6i + 3] + src2[6i + 0] + src2[6i + 3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2105 int g= src1[6i + 1] + src1[6i + 4] + src2[6i + 1] + src2[6i + 4];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2106 int r= src1[6i + 2] + src1[6i + 5] + src2[6i + 2] + src2[6i + 5];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2107
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2108 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2109 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2110 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2111 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2112 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2113
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2114 static inline void RENAME(bgr16ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2115 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2116 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2117 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2118 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2119 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2120 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2121 int g= (d>>5)&0x3F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2122 int r= (d>>11)&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2123
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2124 dst[i]= ((2RYr + GYg + 2BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2125 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2126 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2127
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2128 static inline void RENAME(bgr16ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2129 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2130 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2131 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2132 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2133 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2134 int d1= ((uint32_t*)src2)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2135
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2136 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2137 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2138
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2139 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2140 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2141
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2142 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2143 int r= (d>>11)&0x7F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2144 int g= d>>21;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2145 dstU[i]= ((2RUr + GUg + 2BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2146 dstV[i]= ((2RVr + GVg + 2BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2147 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2148 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2149
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2150 static inline void RENAME(bgr15ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2151 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2152 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2153 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2154 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2155 int d= ((uint16_t*)src)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2156 int b= d&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2157 int g= (d>>5)&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2158 int r= (d>>10)&0x1F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2159
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2160 dst[i]= ((RYr + GYg + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2161 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2162 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2163
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2164 static inline void RENAME(bgr15ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2165 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2166 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2167 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2168 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2169 int d0= ((uint32_t*)src1)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2170 int d1= ((uint32_t*)src2)[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2171
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2172 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2173 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2174
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2175 int dh2= (dh>>11) + (dh<<21);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2176 int d= dh2 + dl;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2177
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2178 int b= d&0x7F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2179 int r= (d>>10)&0x7F;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2180 int g= d>>21;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2181 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2182 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2183 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2184 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2185
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2186
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2187 static inline void RENAME(rgb32ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2188 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2189 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2190 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2191 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2192 int r= ((uint32_t*)src)[i]&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2193 int g= (((uint32_t*)src)[i]>>8)&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2194 int b= (((uint32_t*)src)[i]>>16)&0xFF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2195
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2196 dst[i]= ((RYr + GYg + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2197 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2198 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2199
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2200 static inline void RENAME(rgb32ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2201 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2202 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2203 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2204 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2205 const int a= ((uint32_t)src1)[2i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2206 const int e= ((uint32_t)src1)[2i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2207 const int c= ((uint32_t)src2)[2i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2208 const int d= ((uint32_t)src2)[2i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2209 const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2210 const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2211 const int r= l&0x3FF;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2212 const int g= h>>8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2213 const int b= l>>16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2214
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2215 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2216 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2217 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2218 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2219
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2220 static inline void RENAME(rgb24ToY)(uint8_t dst, uint8_t src, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2221 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2222 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2223 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2224 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2225 int r= src[i*3+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2226 int g= src[i*3+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2227 int b= src[i*3+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2228
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2229 dst[i]= ((RYr + GYg + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2230 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2231 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2232
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2233 static inline void RENAME(rgb24ToUV)(uint8_t dstU, uint8_t dstV, uint8_t src1, uint8_t src2, int width)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2234 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2235 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2236 for(i=0; i<width; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2237 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2238 int r= src1[6i + 0] + src1[6i + 3] + src2[6i + 0] + src2[6i + 3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2239 int g= src1[6i + 1] + src1[6i + 4] + src2[6i + 1] + src2[6i + 4];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2240 int b= src1[6i + 2] + src1[6i + 5] + src2[6i + 2] + src2[6i + 5];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2241
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2242 dstU[i]= ((RUr + GUg + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2243 dstV[i]= ((RVr + GVg + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2244 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2245 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2246
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2247
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2248 // Bilinear / Bicubic scaling
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2249 static inline void RENAME(hScale)(int16_t dst, int dstW, uint8_t src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2250 int16_t filter, int16_t filterPos, long filterSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2251 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2252 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2253 assert(filterSize % 4 == 0 && filterSize>0);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2254 if(filterSize==4) // allways true for upscaling, sometimes for down too
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2255 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2256 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2257 filter-= counter*2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2258 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2259 dst-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2260 asm volatile(
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2261 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2262 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2263 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2264 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2265 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2266 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2267 "mov %%"REG_a", %%"REG_BP" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2268 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2269 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2270 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2271 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2272 "movq (%1, %%"REG_BP", 4), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2273 "movq 8(%1, %%"REG_BP", 4), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2274 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2275 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2276 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2277 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2278 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2279 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2280 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2281 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2282 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2283 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2284 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2285 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2286 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2287 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2288
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2289 "pop %%"REG_BP" \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2290 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2291 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2292 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2293 : "+a" (counter)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2294 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2295 #if !defined(PIC)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2296 : "%"REG_b
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2297 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2298 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2299 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2300 else if(filterSize==8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2301 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2302 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2303 filter-= counter*4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2304 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2305 dst-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2306 asm volatile(
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2307 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2308 "push %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2309 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2310 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2311 "movq "MANGLE(w02)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2312 "push %%"REG_BP" \n\t" // we use 7 regs here ...
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2313 "mov %%"REG_a", %%"REG_BP" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2314 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2315 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2316 "movzwl (%2, %%"REG_BP"), %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2317 "movzwl 2(%2, %%"REG_BP"), %%ebx\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2318 "movq (%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2319 "movq 16(%1, %%"REG_BP", 8), %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2320 "movd (%3, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2321 "movd (%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2322 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2323 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2324 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2325 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2326
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2327 "movq 8(%1, %%"REG_BP", 8), %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2328 "movq 24(%1, %%"REG_BP", 8), %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2329 "movd 4(%3, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2330 "movd 4(%3, %%"REG_b"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2331 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2332 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2333 "pmaddwd %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2334 "pmaddwd %%mm2, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2335 "paddd %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2336 "paddd %%mm5, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2337
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2338 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2339 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2340 "packssdw %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2341 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2342 "packssdw %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2343 "movd %%mm0, (%4, %%"REG_BP") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2344 "add $4, %%"REG_BP" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2345 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2346
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2347 "pop %%"REG_BP" \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2348 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2349 "pop %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2350 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2351 : "+a" (counter)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2352 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2353 #if !defined(PIC)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2354 : "%"REG_b
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2355 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2356 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2357 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2358 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2359 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2360 uint8_t *offset = src+filterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2361 long counter= -2*dstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2362 // filter-= counter*filterSize/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2363 filterPos-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2364 dst-= counter/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2365 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2366 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2367 "movq "MANGLE(w02)", %%mm6 \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2368 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2369 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2370 "mov %2, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2371 "movzwl (%%"REG_c", %0), %%eax \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2372 "movzwl 2(%%"REG_c", %0), %%edx \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2373 "mov %5, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2374 "pxor %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2375 "pxor %%mm5, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2376 "2: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2377 "movq (%1), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2378 "movq (%1, %6), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2379 "movd (%%"REG_c", %%"REG_a"), %%mm0\n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2380 "movd (%%"REG_c", %%"REG_d"), %%mm2\n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2381 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2382 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2383 "pmaddwd %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2384 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2385 "paddd %%mm3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2386 "paddd %%mm0, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2387 "add $8, %1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2388 "add $4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2389 "cmp %4, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2390 " jb 2b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2391 "add %6, %1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2392 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2393 "psrad $8, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2394 "packssdw %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2395 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2396 "packssdw %%mm4, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2397 "mov %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2398 "movd %%mm4, (%%"REG_a", %0) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2399 "add $4, %0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2400 " jnc 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2401
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2402 : "+r" (counter), "+r" (filter)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2403 : "m" (filterPos), "m" (dst), "m"(offset),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2404 "m" (src), "r" (filterSize*2)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2405 : "%"REG_a, "%"REG_c, "%"REG_d
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2406 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2407 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2408 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2409 #ifdef HAVE_ALTIVEC
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2410 hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2411 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2412 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2413 for(i=0; i<dstW; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2414 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2415 int j;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2416 int srcPos= filterPos[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2417 int val=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2418 // printf("filterPos: %d\n", filterPos[i]);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2419 for(j=0; j<filterSize; j++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2420 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2421 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2422 val += ((int)src[srcPos + j])filter[filterSizei + j];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2423 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2424 // filter += hFilterSize;
19181 e40cf0305d4e Replace MIN() and MAX() with FFMIN() and FFMAX() lucabe parents: 19173 diff changeset	2425 dst[i] = FFMIN(FFMAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ...
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2426 // dst[i] = val>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2427 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2428 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2429 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2430 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2431 // *** horizontal scale Y line to temp buffer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2432 static inline void RENAME(hyscale)(uint16_t dst, long dstWidth, uint8_t src, int srcW, int xInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2433 int flags, int canMMX2BeUsed, int16_t *hLumFilter,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2434 int16_t hLumFilterPos, int hLumFilterSize, void funnyYCode,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2435 int srcFormat, uint8_t formatConvBuffer, int16_t mmx2Filter,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2436 int32_t *mmx2FilterPos)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2437 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2438 if(srcFormat==IMGFMT_YUY2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2439 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2440 RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2441 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2442 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2443 else if(srcFormat==IMGFMT_UYVY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2444 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2445 RENAME(uyvyToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2446 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2447 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2448 else if(srcFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2449 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2450 RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2451 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2452 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2453 else if(srcFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2454 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2455 RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2456 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2457 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2458 else if(srcFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2459 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2460 RENAME(bgr16ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2461 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2462 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2463 else if(srcFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2464 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2465 RENAME(bgr15ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2466 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2467 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2468 else if(srcFormat==IMGFMT_RGB32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2469 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2470 RENAME(rgb32ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2471 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2472 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2473 else if(srcFormat==IMGFMT_RGB24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2474 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2475 RENAME(rgb24ToY)(formatConvBuffer, src, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2476 src= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2477 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2478
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2479 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2480 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2481 if(!(flags&SWS_FAST_BILINEAR) \|\| (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2482 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2483 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2484 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2485 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2486 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2487 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2488 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2489 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2490 #if defined(ARCH_X86) \|\| defined(ARCH_X86_64)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2491 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2492 int i;
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2493 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2494 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2495 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2496 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2497 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2498 asm volatile(
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2499 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2500 "mov %%"REG_b", %5 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2501 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2502 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2503 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2504 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2505 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2506 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2507 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2508 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2509 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2510 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2511
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2512 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2513
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2514 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2515 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2516 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2517 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2518 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2519 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2520 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2521
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2522 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2523
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2524 #define FUNNY_Y_CODE \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2525 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2526 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2527 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2528 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2529 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2530
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2531 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2532
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2533 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2534 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2535 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2536 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2537 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2538 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2539 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2540 FUNNY_Y_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2541
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2542 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2543 "mov %5, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2544 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2545 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2546 "m" (funnyYCode)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2547 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2548 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2549 #endif
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2550 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2551 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2552 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2553 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2554 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2555 for(i=dstWidth-1; (ixInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2556 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2557 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2558 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2559 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2560 long xInc_shr16 = xInc >> 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2561 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2562 //NO MMX just normal asm ...
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2563 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2564 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2565 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2566 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2567 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2568 "1: \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2569 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2570 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2571 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2572 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])2xalpha
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2573 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2574 "addl %%edi, %%esi \n\t" //src[xx+1]2xalpha + src[xx](1-2xalpha)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2575 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2576 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2577 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2578 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2579 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2580
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2581 "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2582 "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2583 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2584 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])2xalpha
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2585 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2586 "addl %%edi, %%esi \n\t" //src[xx+1]2xalpha + src[xx](1-2xalpha)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2587 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2588 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2589 "movw %%si, 2(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2590 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2591 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2592
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2593
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2594 "add $2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2595 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2596 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2597
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2598
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2599 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2600 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2601 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2602 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2603 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2604 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2605 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2606 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2607 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2608 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2609 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2610 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2611 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2612 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2613 xpos+=xInc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2614 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2615 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2616 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2617 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2618
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2619 inline static void RENAME(hcscale)(uint16_t dst, long dstWidth, uint8_t src1, uint8_t *src2,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2620 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2621 int16_t hChrFilterPos, int hChrFilterSize, void funnyUVCode,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2622 int srcFormat, uint8_t formatConvBuffer, int16_t mmx2Filter,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2623 int32_t *mmx2FilterPos)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2624 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2625 if(srcFormat==IMGFMT_YUY2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2626 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2627 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2628 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2629 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2630 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2631 else if(srcFormat==IMGFMT_UYVY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2632 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2633 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2634 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2635 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2636 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2637 else if(srcFormat==IMGFMT_BGR32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2638 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2639 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2640 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2641 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2642 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2643 else if(srcFormat==IMGFMT_BGR24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2644 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2645 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2646 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2647 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2648 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2649 else if(srcFormat==IMGFMT_BGR16)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2650 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2651 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2652 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2653 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2654 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2655 else if(srcFormat==IMGFMT_BGR15)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2656 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2657 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2658 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2659 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2660 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2661 else if(srcFormat==IMGFMT_RGB32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2662 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2663 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2664 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2665 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2666 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2667 else if(srcFormat==IMGFMT_RGB24)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2668 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2669 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2670 src1= formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2671 src2= formatConvBuffer+2048;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2672 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2673 else if(isGray(srcFormat))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2674 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2675 return;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2676 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2677
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2678 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2679 // use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2680 if(!(flags&SWS_FAST_BILINEAR) \|\| (!canMMX2BeUsed))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2681 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2682 if(!(flags&SWS_FAST_BILINEAR))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2683 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2684 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2685 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2686 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2687 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2688 else // Fast Bilinear upscale / crap downscale
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2689 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2690 #if defined(ARCH_X86) \|\| defined(ARCH_X86_64)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2691 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2692 int i;
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2693 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2694 uint64_t ebxsave __attribute__((aligned(8)));
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2695 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2696 if(canMMX2BeUsed)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2697 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2698 asm volatile(
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2699 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2700 "mov %%"REG_b", %6 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2701 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2702 "pxor %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2703 "mov %0, %%"REG_c" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2704 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2705 "mov %2, %%"REG_d" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2706 "mov %3, %%"REG_b" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2707 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2708 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2709 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2710 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2711
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2712 #ifdef ARCH_X86_64
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2713
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2714 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2715 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2716 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2717 "movl (%%"REG_b", %%"REG_a"), %%esi\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2718 "add %%"REG_S", %%"REG_c" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2719 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2720 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2721
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2722 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2723
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2724 #define FUNNY_UV_CODE \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2725 "movl (%%"REG_b"), %%esi \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2726 "call *%4 \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2727 "addl (%%"REG_b", %%"REG_a"), %%"REG_c"\n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2728 "add %%"REG_a", %%"REG_D" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2729 "xor %%"REG_a", %%"REG_a" \n\t"\
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2730
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2731 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2732
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2733 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2734 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2735 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2736 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2737 "xor %%"REG_a", %%"REG_a" \n\t" // i
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2738 "mov %5, %%"REG_c" \n\t" // src
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2739 "mov %1, %%"REG_D" \n\t" // buf1
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2740 "add $4096, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2741 PREFETCH" (%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2742 PREFETCH" 32(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2743 PREFETCH" 64(%%"REG_c") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2744
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2745 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2746 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2747 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2748 FUNNY_UV_CODE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2749
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2750 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2751 "mov %6, %%"REG_b" \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2752 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2753 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2754 "m" (funnyUVCode), "m" (src2)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2755 #if defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2756 ,"m" (ebxsave)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2757 #endif
19400 0310c3310360 Fix compilation with -no-PIC and without -fomit-frame-pointer (used by uau parents: 19396 diff changeset	2758 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2759 #if !defined(PIC)
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2760 ,"%"REG_b
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2761 #endif
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2762 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2763 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2764 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2765 // printf("%d %d %d\n", dstWidth, i, srcW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2766 dst[i] = src1[srcW-1]*128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2767 dst[i+2048] = src2[srcW-1]*128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2768 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2769 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2770 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2771 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2772 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2773 long xInc_shr16 = (long) (xInc >> 16);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2774 uint16_t xInc_mask = xInc & 0xffff;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2775 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2776 "xor %%"REG_a", %%"REG_a" \n\t" // i
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2777 "xor %%"REG_d", %%"REG_d" \n\t" // xx
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2778 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 19181 diff changeset	2779 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2780 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2781 "mov %0, %%"REG_S" \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2782 "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2783 "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2784 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2785 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])2xalpha
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2786 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2787 "addl %%edi, %%esi \n\t" //src[xx+1]2xalpha + src[xx](1-2xalpha)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2788 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2789 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2790 "movw %%si, (%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2791
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2792 "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2793 "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2794 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2795 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])2xalpha
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2796 "shll $16, %%edi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2797 "addl %%edi, %%esi \n\t" //src[xx+1]2xalpha + src[xx](1-2xalpha)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2798 "mov %1, %%"REG_D" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2799 "shrl $9, %%esi \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2800 "movw %%si, 4096(%%"REG_D", %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2801
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2802 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2803 "adc %3, %%"REG_d" \n\t" //xx+= xInc>>8 + carry
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2804 "add $1, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2805 "cmp %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2806 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2807
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2808 /* GCC-3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2809 which is needed to support GCC-4.0 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2810 #if defined(ARCH_X86_64) && ((__GNUC__ > 3) \|\| ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2811 :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2812 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2813 :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2814 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2815 "r" (src2)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2816 : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2817 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2818 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2819 } //if MMX2 can't be used
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2820 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2821 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2822 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2823 unsigned int xpos=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2824 for(i=0;i<dstWidth;i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2825 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2826 register unsigned int xx=xpos>>16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2827 register unsigned int xalpha=(xpos&0xFFFF)>>9;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2828 dst[i]=(src1[xx](xalpha^127)+src1[xx+1]xalpha);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2829 dst[i+2048]=(src2[xx](xalpha^127)+src2[xx+1]xalpha);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2830 /* slower
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2831 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2832 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2833 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2834 xpos+=xInc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2835 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2836 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2837 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2838 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2839
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2840 static int RENAME(swScale)(SwsContext c, uint8_t src[], int srcStride[], int srcSliceY,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2841 int srcSliceH, uint8_t* dst[], int dstStride[]){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2842
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2843 /* load a few things into local vars to make the code more readable? and faster */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2844 const int srcW= c->srcW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2845 const int dstW= c->dstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2846 const int dstH= c->dstH;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2847 const int chrDstW= c->chrDstW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2848 const int chrSrcW= c->chrSrcW;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2849 const int lumXInc= c->lumXInc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2850 const int chrXInc= c->chrXInc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2851 const int dstFormat= c->dstFormat;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2852 const int srcFormat= c->srcFormat;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2853 const int flags= c->flags;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2854 const int canMMX2BeUsed= c->canMMX2BeUsed;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2855 int16_t *vLumFilterPos= c->vLumFilterPos;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2856 int16_t *vChrFilterPos= c->vChrFilterPos;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2857 int16_t *hLumFilterPos= c->hLumFilterPos;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2858 int16_t *hChrFilterPos= c->hChrFilterPos;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2859 int16_t *vLumFilter= c->vLumFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2860 int16_t *vChrFilter= c->vChrFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2861 int16_t *hLumFilter= c->hLumFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2862 int16_t *hChrFilter= c->hChrFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2863 int32_t *lumMmxFilter= c->lumMmxFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2864 int32_t *chrMmxFilter= c->chrMmxFilter;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2865 const int vLumFilterSize= c->vLumFilterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2866 const int vChrFilterSize= c->vChrFilterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2867 const int hLumFilterSize= c->hLumFilterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2868 const int hChrFilterSize= c->hChrFilterSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2869 int16_t **lumPixBuf= c->lumPixBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2870 int16_t **chrPixBuf= c->chrPixBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2871 const int vLumBufSize= c->vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2872 const int vChrBufSize= c->vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2873 uint8_t *funnyYCode= c->funnyYCode;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2874 uint8_t *funnyUVCode= c->funnyUVCode;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2875 uint8_t *formatConvBuffer= c->formatConvBuffer;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2876 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2877 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2878 int lastDstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2879
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2880 /* vars whch will change and which we need to storw back in the context */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2881 int dstY= c->dstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2882 int lumBufIndex= c->lumBufIndex;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2883 int chrBufIndex= c->chrBufIndex;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2884 int lastInLumBuf= c->lastInLumBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2885 int lastInChrBuf= c->lastInChrBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2886
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2887 if(isPacked(c->srcFormat)){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2888 src[0]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2889 src[1]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2890 src[2]= src[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2891 srcStride[0]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2892 srcStride[1]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2893 srcStride[2]= srcStride[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2894 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2895 srcStride[1]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2896 srcStride[2]<<= c->vChrDrop;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2897
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2898 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2899 // (int)dst[0], (int)dst[1], (int)dst[2]);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2900
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2901 #if 0 //self test FIXME move to a vfilter or something
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2902 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2903 static volatile int i=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2904 i++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2905 if(srcFormat==IMGFMT_YV12 && i==1 && srcSliceH>= c->srcH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2906 selfTest(src, srcStride, c->srcW, c->srcH);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2907 i--;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2908 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2909 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2910
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2911 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2912 //dstStride[0],dstStride[1],dstStride[2]);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2913
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2914 if(dstStride[0]%8 !=0 \|\| dstStride[1]%8 !=0 \|\| dstStride[2]%8 !=0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2915 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2916 static int firstTime=1; //FIXME move this into the context perhaps
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2917 if(flags & SWS_PRINT_INFO && firstTime)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2918 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2919 MSG_WARN("SwScaler: Warning: dstStride is not aligned!\n"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2920 "SwScaler: ->cannot do aligned memory acesses anymore\n");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2921 firstTime=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2922 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2923 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2924
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2925 /* Note the user might start scaling the picture in the middle so this will not get executed
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2926 this is not really intended but works currently, so ppl might do it */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2927 if(srcSliceY ==0){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2928 lumBufIndex=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2929 chrBufIndex=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2930 dstY=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2931 lastInLumBuf= -1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2932 lastInChrBuf= -1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2933 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2934
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2935 lastDstY= dstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2936
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2937 for(;dstY < dstH; dstY++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2938 unsigned char dest =dst[0]+dstStride[0]dstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2939 const int chrDstY= dstY>>c->chrDstVSubSample;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2940 unsigned char uDest=dst[1]+dstStride[1]chrDstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2941 unsigned char vDest=dst[2]+dstStride[2]chrDstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2942
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2943 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2944 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2945 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2946 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2947
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2948 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2949 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2950 //handle holes (FAST_BILINEAR & weird filters)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2951 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2952 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2953 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2954 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2955 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2956
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2957 // Do we have enough lines in this slice to output the dstY line
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2958 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2959 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2960 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2961 while(lastInLumBuf < lastLumSrcY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2962 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2963 uint8_t s= src[0]+(lastInLumBuf + 1 - srcSliceY)srcStride[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2964 lumBufIndex++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2965 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2966 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2967 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2968 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2969 // printf("%d %d\n", lumBufIndex, vLumBufSize);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2970 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2971 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2972 funnyYCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2973 c->lumMmx2Filter, c->lumMmx2FilterPos);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2974 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2975 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2976 while(lastInChrBuf < lastChrSrcY)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2977 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2978 uint8_t src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)srcStride[1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2979 uint8_t src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)srcStride[2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2980 chrBufIndex++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2981 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2982 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2983 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2984 //FIXME replace parameters through context struct (some at least)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2985
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2986 if(!(isGray(srcFormat) \|\| isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2987 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2988 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2989 funnyUVCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2990 c->chrMmx2Filter, c->chrMmx2FilterPos);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2991 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2992 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2993 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2994 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2995 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2996 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2997 else // not enough lines left in this slice -> load the rest in the buffer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2998 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2999 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3000 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3001 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3002 vChrBufSize, vLumBufSize);*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3003
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3004 //Do horizontal scaling
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3005 while(lastInLumBuf+1 < srcSliceY + srcSliceH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3006 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3007 uint8_t s= src[0]+(lastInLumBuf + 1 - srcSliceY)srcStride[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3008 lumBufIndex++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3009 ASSERT(lumBufIndex < 2*vLumBufSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3010 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3011 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3012 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3013 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3014 funnyYCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3015 c->lumMmx2Filter, c->lumMmx2FilterPos);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3016 lastInLumBuf++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3017 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3018 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3019 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3020 uint8_t src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)srcStride[1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3021 uint8_t src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)srcStride[2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3022 chrBufIndex++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3023 ASSERT(chrBufIndex < 2*vChrBufSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3024 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3025 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3026
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3027 if(!(isGray(srcFormat) \|\| isGray(dstFormat)))
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3028 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3029 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3030 funnyUVCode, c->srcFormat, formatConvBuffer,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3031 c->chrMmx2Filter, c->chrMmx2FilterPos);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3032 lastInChrBuf++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3033 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3034 //wrap buf index around to stay inside the ring buffer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3035 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3036 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3037 break; //we can't output a dstY line so let's try with the next slice
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3038 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3039
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3040 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3041 b5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3042 g6Dither= dither4[dstY&1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3043 g5Dither= dither8[dstY&1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3044 r5Dither= dither8[(dstY+1)&1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3045 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3046 if(dstY < dstH-2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3047 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3048 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3049 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3050 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3051 int i;
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3052 if(flags & SWS_ACCURATE_RND){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3053 for(i=0; i<vLumFilterSize; i+=2){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3054 lumMmxFilter[2*i+0]= lumSrcPtr[i ];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3055 lumMmxFilter[2*i+1]= lumSrcPtr[i+(vLumFilterSize>1)];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3056 lumMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3057 lumMmxFilter[2i+3]= vLumFilter[dstYvLumFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3058 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3059 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3060 for(i=0; i<vChrFilterSize; i+=2){
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3061 chrMmxFilter[2*i+0]= chrSrcPtr[i ];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3062 chrMmxFilter[2*i+1]= chrSrcPtr[i+(vChrFilterSize>1)];
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3063 chrMmxFilter[2*i+2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3064 chrMmxFilter[2i+3]= vChrFilter[chrDstYvChrFilterSize + i ]
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3065 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3066 }
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3067 }else{
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3068 for(i=0; i<vLumFilterSize; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3069 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3070 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3071 lumMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3072 lumMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3073 ((uint16_t)vLumFilter[dstYvLumFilterSize + i])0x10001;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3074 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3075 for(i=0; i<vChrFilterSize; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3076 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3077 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3078 chrMmxFilter[4*i+2]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3079 chrMmxFilter[4*i+3]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3080 ((uint16_t)vChrFilter[chrDstYvChrFilterSize + i])0x10001;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3081 }
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3082 }
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3083 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3084 if(dstFormat == IMGFMT_NV12 \|\| dstFormat == IMGFMT_NV21){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3085 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3086 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3087 RENAME(yuv2nv12X)(c,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3088 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3089 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3090 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3091 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3092 else if(isPlanarYUV(dstFormat) \|\| isGray(dstFormat)) //YV12 like
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3093 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3094 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3095 if((dstY&chrSkipMask) \|\| isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3096 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3097 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3098 int16_t *lumBuf = lumPixBuf[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3099 int16_t *chrBuf= chrPixBuf[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3100 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3101 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3102 else //General YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3103 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3104 RENAME(yuv2yuvX)(c,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3105 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3106 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3107 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3108 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3109 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3110 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3111 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3112 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3113 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3114 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3115 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3116 int chrAlpha= vChrFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3117 RENAME(yuv2packed1)(c, lumSrcPtr, chrSrcPtr, *(chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3118 dest, dstW, chrAlpha, dstFormat, flags, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3119 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3120 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3121 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3122 int lumAlpha= vLumFilter[2*dstY+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3123 int chrAlpha= vChrFilter[2*dstY+1];
19172 bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3124 lumMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3125 lumMmxFilter[3]= vLumFilter[2dstY ]0x10001;
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3126 chrMmxFilter[2]=
bae6c99a99cc vertical scaler with accurate rounding, some people on doom9 can see +-1 errors michael parents: 18861 diff changeset	3127 chrMmxFilter[3]= vChrFilter[2chrDstY]0x10001;
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3128 RENAME(yuv2packed2)(c, lumSrcPtr, (lumSrcPtr+1), chrSrcPtr, (chrSrcPtr+1),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3129 dest, dstW, lumAlpha, chrAlpha, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3130 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3131 else //General RGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3132 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3133 RENAME(yuv2packedX)(c,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3134 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3135 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3136 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3137 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3138 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3139 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3140 else // hmm looks like we can't use MMX here without overwriting this array's tail
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3141 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3142 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3143 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3144 if(dstFormat == IMGFMT_NV12 \|\| dstFormat == IMGFMT_NV21){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3145 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3146 if(dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3147 yuv2nv12XinC(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3148 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3149 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3150 dest, uDest, dstW, chrDstW, dstFormat);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3151 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3152 else if(isPlanarYUV(dstFormat) \|\| isGray(dstFormat)) //YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3153 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3154 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3155 if((dstY&chrSkipMask) \|\| isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3156 yuv2yuvXinC(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3157 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3158 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3159 dest, uDest, vDest, dstW, chrDstW);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3160 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3161 else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3162 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3163 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3164 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3165 yuv2packedXinC(c,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3166 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3167 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3168 dest, dstW, dstY);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3169 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3170 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3171 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3172
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3173 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3174 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3175 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3176 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3177 /* store changed local vars back in the context */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3178 c->dstY= dstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3179 c->lumBufIndex= lumBufIndex;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3180 c->chrBufIndex= chrBufIndex;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3181 c->lastInLumBuf= lastInLumBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3182 c->lastInChrBuf= lastInChrBuf;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3183
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3184 return dstY - lastDstY;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3185 }

Mercurial > mplayer.hg

annotate libswscale/swscale_template.c @ 19640:521f71200591