mplayer.hg: libswscale/rgb2rgb

annotate libswscale/rgb2rgb_template.c @ 23125:48fdbc777870

onboard X200 GPU (shared memory) returns a 0 RAM size (patch by rjoco77@kezdionline.ro)

author	ben
date	Thu, 26 Apr 2007 18:39:08 +0000
parents	beb4ac492c5e
children	9528d1ebe68f

rev	line source
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1 /*
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2 *
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	3 * rgb2rgb.c, Software RGB to RGB convertor
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	4 * pluralize by Software PAL8 to RGB convertor
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	5 * Software YUV to YUV convertor
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	6 * Software YUV to RGB convertor
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	7 * Written by Nick Kurshev.
19703 ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	8 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	9 * lot of big-endian byteorder fixes by Alex Beregszaszi
19703 ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	10 *
20094 aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'. diego parents: 19703 diff changeset	11 * This file is part of FFmpeg.
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'. diego parents: 19703 diff changeset	12 *
aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'. diego parents: 19703 diff changeset	13 * FFmpeg is free software; you can redistribute it and/or modify
19703 ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	14 * it under the terms of the GNU General Public License as published by
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	15 * the Free Software Foundation; either version 2 of the License, or
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	16 * (at your option) any later version.
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	17 *
20094 aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'. diego parents: 19703 diff changeset	18 * FFmpeg is distributed in the hope that it will be useful,
19703 ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	21 * GNU General Public License for more details.
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	22 *
ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	23 * You should have received a copy of the GNU General Public License
20094 aca9e9783f67 Change license headers to say 'FFmpeg' instead of 'this program'. diego parents: 19703 diff changeset	24 * along with FFmpeg; if not, write to the Free Software
19703 ad7f49a1ba95 Add official GPL header to make license explicit as discussed on ffmpeg-devel. diego parents: 19396 diff changeset	25 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21029 1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed. lucabe parents: 20724 diff changeset	26 *
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed. lucabe parents: 20724 diff changeset	27 * the C code (not assembly, mmx, ...) of this file can be used
1f2ba24b4e47 Clarify that some of the non-SIMD code is now LGPLed. lucabe parents: 20724 diff changeset	28 * under the LGPL license too
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	29 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	30
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	31 #include <stddef.h>
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	32 #include <inttypes.h> /* for __WORDSIZE */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	33
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	34 #ifndef __WORDSIZE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	35 // #warning You have misconfigured system and probably will lose performance!
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	36 #define __WORDSIZE MP_WORDSIZE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	37 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	38
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	39 #undef PREFETCH
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	40 #undef MOVNTQ
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	41 #undef EMMS
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	42 #undef SFENCE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	43 #undef MMREG_SIZE
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	44 #undef PREFETCHW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	45 #undef PAVGB
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	46
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	47 #ifdef HAVE_SSE2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	48 #define MMREG_SIZE 16
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	49 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	50 #define MMREG_SIZE 8
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	51 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	52
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	53 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	54 #define PREFETCH "prefetch"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	55 #define PREFETCHW "prefetchw"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	56 #define PAVGB "pavgusb"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	57 #elif defined ( HAVE_MMX2 )
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	58 #define PREFETCH "prefetchnta"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	59 #define PREFETCHW "prefetcht0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	60 #define PAVGB "pavgb"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	61 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	62 #ifdef __APPLE__
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	63 #define PREFETCH "#"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	64 #define PREFETCHW "#"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	65 #else
20724 b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS gpoirier parents: 20094 diff changeset	66 #define PREFETCH " # nop"
b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS gpoirier parents: 20094 diff changeset	67 #define PREFETCHW " # nop"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	68 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	69 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	70
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	71 #ifdef HAVE_3DNOW
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	72 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	73 #define EMMS "femms"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	74 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	75 #define EMMS "emms"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	76 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	77
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	78 #ifdef HAVE_MMX2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	79 #define MOVNTQ "movntq"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	80 #define SFENCE "sfence"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	81 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	82 #define MOVNTQ "movq"
20724 b8fe18a742ce Fix MacIntel build: "/nop" is illegal on Apple's older version of GAS gpoirier parents: 20094 diff changeset	83 #define SFENCE " # nop"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	84 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	85
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	86 static inline void RENAME(rgb24to32)(const uint8_t src,uint8_t dst,long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	87 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	88 uint8_t *dest = dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	89 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	90 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	91 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	92 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	93 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	94 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	95 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	96 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	97 mm_end = end - 23;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	98 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	99 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	100 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	101 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	102 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	103 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	104 "punpckldq 3%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	105 "movd 6%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	106 "punpckldq 9%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	107 "movd 12%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	108 "punpckldq 15%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	109 "movd 18%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	110 "punpckldq 21%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	111 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	112 "pand %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	113 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	114 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	115 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	116 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	117 MOVNTQ" %%mm2, 16%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	118 MOVNTQ" %%mm3, 24%0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	119 :"=m"(*dest)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	120 :"m"(*s)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	121 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	122 dest += 32;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	123 s += 24;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	124 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	125 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	126 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	127 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	128 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	129 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	130 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	131 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	132 *dest++ = 0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	133 *dest++ = s[2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	134 *dest++ = s[1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	135 *dest++ = s[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	136 s+=3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	137 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	138 dest++ = s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	139 dest++ = s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	140 dest++ = s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	141 *dest++ = 0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	142 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	143 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	144 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	145
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	146 static inline void RENAME(rgb32to24)(const uint8_t src,uint8_t dst,long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	147 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	148 uint8_t *dest = dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	149 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	150 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	151 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	152 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	153 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	154 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	155 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	156 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	157 mm_end = end - 31;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	158 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	159 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	160 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	161 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	162 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	163 "movq 8%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	164 "movq 16%1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	165 "movq 24%1, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	166 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	167 "movq %%mm1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	168 "movq %%mm4, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	169 "movq %%mm5, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	170 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	171 "psrlq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	172 "psrlq $8, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	173 "psrlq $8, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	174 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	175 "pand %2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	176 "pand %2, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	177 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	178 "pand %3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	179 "pand %3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	180 "pand %3, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	181 "pand %3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	182 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	183 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	184 "por %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	185 "por %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	186
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	187 "movq %%mm1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	188 "movq %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	189 "psllq $48, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	190 "psllq $32, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	191 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	192 "pand %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	193 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	194 "psrlq $16, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	195 "psrlq $32, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	196 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	197 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	198 "pand %6, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	199 "por %%mm5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	200
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	201 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	202 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	203 MOVNTQ" %%mm4, 16%0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	204 :"=m"(*dest)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	205 :"m"(*s),"m"(mask24l),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	206 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	207 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	208 dest += 24;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	209 s += 32;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	210 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	211 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	212 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	213 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	214 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	215 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	216 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	217 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	218 s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	219 dest[2] = *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	220 dest[1] = *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	221 dest[0] = *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	222 dest += 3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	223 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	224 dest++ = s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	225 dest++ = s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	226 dest++ = s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	227 s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	228 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	229 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	230 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	231
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	232 /*
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	233 Original by Strepto/Astral
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	234 ported to gcc & bugfixed : A'rpi
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	235 MMX2, 3DNOW optimization by Nick Kurshev
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	236 32bit c version, and and&add trick by Michael Niedermayer
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	237 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	238 static inline void RENAME(rgb15to16)(const uint8_t src,uint8_t dst,long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	239 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	240 register const uint8_t* s=src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	241 register uint8_t* d=dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	242 register const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	243 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	244 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	245 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	246 __asm __volatile(PREFETCH" %0"::"m"(*s));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	247 __asm __volatile("movq %0, %%mm4"::"m"(mask15s));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	248 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	249 while(s<mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	250 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	251 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	252 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	253 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	254 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	255 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	256 "movq %%mm2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	257 "pand %%mm4, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	258 "pand %%mm4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	259 "paddw %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	260 "paddw %%mm3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	261 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	262 MOVNTQ" %%mm2, 8%0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	263 :"=m"(*d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	264 :"m"(*s)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	265 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	266 d+=16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	267 s+=16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	268 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	269 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	270 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	271 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	272 mm_end = end - 3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	273 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	274 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	275 register unsigned x= ((uint32_t )s);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	276 ((uint32_t )d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	277 d+=4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	278 s+=4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	279 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	280 if(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	281 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	282 register unsigned short x= ((uint16_t )s);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	283 ((uint16_t )d) = (x&0x7FFF) + (x&0x7FE0);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	284 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	285 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	286
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	287 static inline void RENAME(rgb16to15)(const uint8_t src,uint8_t dst,long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	288 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	289 register const uint8_t* s=src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	290 register uint8_t* d=dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	291 register const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	292 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	293 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	294 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	295 __asm __volatile(PREFETCH" %0"::"m"(*s));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	296 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	297 __asm __volatile("movq %0, %%mm6"::"m"(mask15b));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	298 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	299 while(s<mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	300 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	301 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	302 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	303 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	304 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	305 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	306 "movq %%mm2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	307 "psrlq $1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	308 "psrlq $1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	309 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	310 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	311 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	312 "pand %%mm6, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	313 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	314 "por %%mm3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	315 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	316 MOVNTQ" %%mm2, 8%0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	317 :"=m"(*d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	318 :"m"(*s)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	319 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	320 d+=16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	321 s+=16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	322 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	323 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	324 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	325 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	326 mm_end = end - 3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	327 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	328 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	329 register uint32_t x= ((uint32_t )s);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	330 ((uint32_t )d) = ((x>>1)&0x7FE07FE0) \| (x&0x001F001F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	331 s+=4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	332 d+=4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	333 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	334 if(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	335 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	336 register uint16_t x= ((uint16_t )s);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	337 ((uint16_t )d) = ((x>>1)&0x7FE0) \| (x&0x001F);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	338 s+=2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	339 d+=2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	340 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	341 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	342
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	343 static inline void RENAME(rgb32to16)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	344 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	345 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	346 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	347 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	348 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	349 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	350 uint16_t d = (uint16_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	351 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	352 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	353 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	354 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	355 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	356 "movq %3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	357 "movq %4, %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	358 "movq %5, %%mm7 \n\t"
22996 2a60af5e78a7 skip MMX code in rgb32to16 if the size of the input is smaller than the ivo parents: 22995 diff changeset	359 "jmp 2f \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	360 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	361 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	362 PREFETCH" 32(%1) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	363 "movd (%1), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	364 "movd 4(%1), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	365 "punpckldq 8(%1), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	366 "punpckldq 12(%1), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	367 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	368 "movq %%mm3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	369 "pand %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	370 "pand %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	371 "pmaddwd %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	372 "pmaddwd %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	373 "pand %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	374 "pand %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	375 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	376 "por %%mm4, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	377 "psrld $5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	378 "pslld $11, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	379 "por %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	380 MOVNTQ" %%mm0, (%0) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	381 "add $16, %1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	382 "add $8, %0 \n\t"
22996 2a60af5e78a7 skip MMX code in rgb32to16 if the size of the input is smaller than the ivo parents: 22995 diff changeset	383 "2: \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	384 "cmp %2, %1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	385 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	386 : "+r" (d), "+r"(s)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	387 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	388 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	389 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	390 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	391 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	392 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	393 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	394 ::"m"(red_16mask),"m"(green_16mask));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	395 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	396 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	397 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	398 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	399 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	400 "movd 4%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	401 "punpckldq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	402 "punpckldq 12%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	403 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	404 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	405 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	406 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	407 "psrlq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	408 "psrlq $3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	409 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	410 "pand %2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	411 "psrlq $5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	412 "psrlq $5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	413 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	414 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	415 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	416 "psrlq $8, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	417 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	418 "pand %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	419 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	420 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	421 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	422 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	423 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	424 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	425 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	426 :"=m"(d):"m"(s),"m"(blue_16mask):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	427 d += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	428 s += 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	429 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	430 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	431 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	432 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	433 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	434 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	435 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	436 register int rgb = (uint32_t)s; s += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	437 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	438 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	439 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	440
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	441 static inline void RENAME(rgb32tobgr16)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	442 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	443 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	444 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	445 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	446 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	447 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	448 uint16_t d = (uint16_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	449 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	450 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	451 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	452 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	453 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	454 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	455 ::"m"(red_16mask),"m"(green_16mask));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	456 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	457 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	458 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	459 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	460 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	461 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	462 "movd 4%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	463 "punpckldq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	464 "punpckldq 12%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	465 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	466 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	467 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	468 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	469 "psllq $8, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	470 "psllq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	471 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	472 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	473 "psrlq $5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	474 "psrlq $5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	475 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	476 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	477 "psrlq $19, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	478 "psrlq $19, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	479 "pand %2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	480 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	481 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	482 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	483 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	484 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	485 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	486 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	487 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	488 :"=m"(d):"m"(s),"m"(blue_16mask):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	489 d += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	490 s += 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	491 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	492 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	493 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	494 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	495 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	496 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	497 register int rgb = (uint32_t)s; s += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	498 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	499 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	500 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	501
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	502 static inline void RENAME(rgb32to15)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	503 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	504 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	505 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	506 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	507 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	508 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	509 uint16_t d = (uint16_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	510 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	511 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	512 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	513 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	514 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	515 "movq %3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	516 "movq %4, %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	517 "movq %5, %%mm7 \n\t"
22995 70d7c6206f33 skip MMX code in rgb32to15 if the size of the input is smaller than the ivo parents: 22994 diff changeset	518 "jmp 2f \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	519 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	520 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	521 PREFETCH" 32(%1) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	522 "movd (%1), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	523 "movd 4(%1), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	524 "punpckldq 8(%1), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	525 "punpckldq 12(%1), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	526 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	527 "movq %%mm3, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	528 "pand %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	529 "pand %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	530 "pmaddwd %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	531 "pmaddwd %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	532 "pand %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	533 "pand %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	534 "por %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	535 "por %%mm4, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	536 "psrld $6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	537 "pslld $10, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	538 "por %%mm3, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	539 MOVNTQ" %%mm0, (%0) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	540 "add $16, %1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	541 "add $8, %0 \n\t"
22995 70d7c6206f33 skip MMX code in rgb32to15 if the size of the input is smaller than the ivo parents: 22994 diff changeset	542 "2: \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	543 "cmp %2, %1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	544 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	545 : "+r" (d), "+r"(s)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	546 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	547 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	548 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	549 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	550 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	551 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	552 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	553 ::"m"(red_15mask),"m"(green_15mask));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	554 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	555 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	556 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	557 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	558 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	559 "movd 4%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	560 "punpckldq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	561 "punpckldq 12%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	562 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	563 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	564 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	565 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	566 "psrlq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	567 "psrlq $3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	568 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	569 "pand %2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	570 "psrlq $6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	571 "psrlq $6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	572 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	573 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	574 "psrlq $9, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	575 "psrlq $9, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	576 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	577 "pand %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	578 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	579 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	580 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	581 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	582 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	583 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	584 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	585 :"=m"(d):"m"(s),"m"(blue_15mask):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	586 d += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	587 s += 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	588 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	589 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	590 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	591 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	592 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	593 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	594 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	595 register int rgb = (uint32_t)s; s += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	596 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	597 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	598 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	599
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	600 static inline void RENAME(rgb32tobgr15)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	601 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	602 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	603 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	604 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	605 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	606 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	607 uint16_t d = (uint16_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	608 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	609 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	610 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	611 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	612 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	613 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	614 ::"m"(red_15mask),"m"(green_15mask));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	615 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	616 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	617 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	618 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	619 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	620 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	621 "movd 4%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	622 "punpckldq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	623 "punpckldq 12%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	624 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	625 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	626 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	627 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	628 "psllq $7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	629 "psllq $7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	630 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	631 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	632 "psrlq $6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	633 "psrlq $6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	634 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	635 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	636 "psrlq $19, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	637 "psrlq $19, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	638 "pand %2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	639 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	640 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	641 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	642 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	643 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	644 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	645 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	646 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	647 :"=m"(d):"m"(s),"m"(blue_15mask):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	648 d += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	649 s += 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	650 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	651 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	652 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	653 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	654 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	655 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	656 register int rgb = (uint32_t)s; s += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	657 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	658 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	659 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	660
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	661 static inline void RENAME(rgb24to16)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	662 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	663 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	664 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	665 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	666 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	667 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	668 uint16_t d = (uint16_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	669 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	670 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	671 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	672 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	673 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	674 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	675 ::"m"(red_16mask),"m"(green_16mask));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	676 mm_end = end - 11;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	677 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	678 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	679 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	680 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	681 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	682 "movd 3%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	683 "punpckldq 6%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	684 "punpckldq 9%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	685 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	686 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	687 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	688 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	689 "psrlq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	690 "psrlq $3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	691 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	692 "pand %2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	693 "psrlq $5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	694 "psrlq $5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	695 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	696 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	697 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	698 "psrlq $8, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	699 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	700 "pand %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	701 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	702 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	703 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	704 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	705 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	706 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	707 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	708 :"=m"(d):"m"(s),"m"(blue_16mask):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	709 d += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	710 s += 12;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	711 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	712 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	713 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	714 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	715 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	716 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	717 const int b= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	718 const int g= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	719 const int r= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	720 *d++ = (b>>3) \| ((g&0xFC)<<3) \| ((r&0xF8)<<8);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	721 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	722 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	723
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	724 static inline void RENAME(rgb24tobgr16)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	725 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	726 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	727 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	728 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	729 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	730 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	731 uint16_t d = (uint16_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	732 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	733 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	734 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	735 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	736 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	737 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	738 ::"m"(red_16mask),"m"(green_16mask));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	739 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	740 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	741 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	742 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	743 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	744 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	745 "movd 3%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	746 "punpckldq 6%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	747 "punpckldq 9%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	748 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	749 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	750 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	751 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	752 "psllq $8, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	753 "psllq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	754 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	755 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	756 "psrlq $5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	757 "psrlq $5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	758 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	759 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	760 "psrlq $19, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	761 "psrlq $19, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	762 "pand %2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	763 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	764 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	765 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	766 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	767 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	768 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	769 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	770 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	771 :"=m"(d):"m"(s),"m"(blue_16mask):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	772 d += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	773 s += 12;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	774 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	775 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	776 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	777 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	778 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	779 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	780 const int r= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	781 const int g= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	782 const int b= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	783 *d++ = (b>>3) \| ((g&0xFC)<<3) \| ((r&0xF8)<<8);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	784 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	785 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	786
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	787 static inline void RENAME(rgb24to15)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	788 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	789 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	790 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	791 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	792 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	793 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	794 uint16_t d = (uint16_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	795 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	796 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	797 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	798 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	799 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	800 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	801 ::"m"(red_15mask),"m"(green_15mask));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	802 mm_end = end - 11;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	803 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	804 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	805 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	806 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	807 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	808 "movd 3%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	809 "punpckldq 6%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	810 "punpckldq 9%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	811 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	812 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	813 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	814 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	815 "psrlq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	816 "psrlq $3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	817 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	818 "pand %2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	819 "psrlq $6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	820 "psrlq $6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	821 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	822 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	823 "psrlq $9, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	824 "psrlq $9, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	825 "pand %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	826 "pand %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	827 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	828 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	829 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	830 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	831 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	832 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	833 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	834 :"=m"(d):"m"(s),"m"(blue_15mask):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	835 d += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	836 s += 12;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	837 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	838 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	839 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	840 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	841 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	842 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	843 const int b= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	844 const int g= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	845 const int r= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	846 *d++ = (b>>3) \| ((g&0xF8)<<2) \| ((r&0xF8)<<7);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	847 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	848 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	849
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	850 static inline void RENAME(rgb24tobgr15)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	851 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	852 const uint8_t *s = src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	853 const uint8_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	854 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	855 const uint8_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	856 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	857 uint16_t d = (uint16_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	858 end = s + src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	859 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	860 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	861 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	862 "movq %0, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	863 "movq %1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	864 ::"m"(red_15mask),"m"(green_15mask));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	865 mm_end = end - 15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	866 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	867 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	868 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	869 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	870 "movd %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	871 "movd 3%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	872 "punpckldq 6%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	873 "punpckldq 9%1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	874 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	875 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	876 "movq %%mm3, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	877 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	878 "psllq $7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	879 "psllq $7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	880 "pand %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	881 "pand %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	882 "psrlq $6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	883 "psrlq $6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	884 "pand %%mm6, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	885 "pand %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	886 "psrlq $19, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	887 "psrlq $19, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	888 "pand %2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	889 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	890 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	891 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	892 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	893 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	894 "psllq $16, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	895 "por %%mm3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	896 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	897 :"=m"(d):"m"(s),"m"(blue_15mask):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	898 d += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	899 s += 12;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	900 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	901 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	902 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	903 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	904 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	905 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	906 const int r= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	907 const int g= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	908 const int b= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	909 *d++ = (b>>3) \| ((g&0xF8)<<2) \| ((r&0xF8)<<7);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	910 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	911 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	912
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	913 /*
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	914 I use here less accurate approximation by simply
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	915 left-shifting the input
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	916 value and filling the low order bits with
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	917 zeroes. This method improves png's
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	918 compression but this scheme cannot reproduce white exactly, since it does not
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	919 generate an all-ones maximum value; the net effect is to darken the
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	920 image slightly.
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	921
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	922 The better method should be "left bit replication":
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	923
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	924 4 3 2 1 0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	925 ---------
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	926 1 1 0 1 1
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	927
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	928 7 6 5 4 3 2 1 0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	929 ----------------
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	930 1 1 0 1 1 1 1 0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	931 \|=======\| \|===\|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	932 \| Leftmost Bits Repeated to Fill Open Bits
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	933 \|
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	934 Original Bits
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	935 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	936 static inline void RENAME(rgb15to24)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	937 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	938 const uint16_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	939 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	940 const uint16_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	941 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	942 uint8_t d = (uint8_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	943 const uint16_t s = (uint16_t )src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	944 end = s + src_size/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	945 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	946 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	947 mm_end = end - 7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	948 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	949 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	950 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	951 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	952 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	953 "movq %1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	954 "movq %1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	955 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	956 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	957 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	958 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	959 "psrlq $2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	960 "psrlq $7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	961 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	962 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	963 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	964 "punpcklwd %5, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	965 "punpcklwd %5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	966 "punpcklwd %5, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	967 "punpckhwd %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	968 "punpckhwd %5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	969 "punpckhwd %5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	970 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	971 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	972 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	973 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	974 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	975 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	976 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	977 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	978
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	979 "movq %%mm0, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	980 "movq %%mm3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	981
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	982 "movq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	983 "movq 8%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	984 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	985 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	986 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	987 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	988 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	989 "psrlq $2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	990 "psrlq $7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	991 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	992 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	993 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	994 "punpcklwd %5, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	995 "punpcklwd %5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	996 "punpcklwd %5, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	997 "punpckhwd %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	998 "punpckhwd %5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	999 "punpckhwd %5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1000 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1001 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1002 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1003 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1004 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1005 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1006 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1007 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1008
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1009 :"=m"(*d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1010 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1011 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1012 /* Borrowed 32 to 24 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1013 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1014 "movq %%mm0, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1015 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1016 "movq %%mm6, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1017 "movq %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1018
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1019 "movq %%mm4, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1020 "movq %%mm5, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1021 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1022 "movq %%mm1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1023
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1024 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1025 "psrlq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1026 "psrlq $8, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1027 "psrlq $8, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1028 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1029 "pand %2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1030 "pand %2, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1031 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1032 "pand %3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1033 "pand %3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1034 "pand %3, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1035 "pand %3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1036 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1037 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1038 "por %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1039 "por %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1040
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1041 "movq %%mm1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1042 "movq %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1043 "psllq $48, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1044 "psllq $32, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1045 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1046 "pand %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1047 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1048 "psrlq $16, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1049 "psrlq $32, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1050 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1051 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1052 "pand %6, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1053 "por %%mm5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1054
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1055 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1056 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1057 MOVNTQ" %%mm4, 16%0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1058
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1059 :"=m"(*d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1060 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1061 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1062 d += 24;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1063 s += 8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1064 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1065 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1066 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1067 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1068 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1069 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1070 register uint16_t bgr;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1071 bgr = *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1072 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1073 *d++ = (bgr&0x3E0)>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1074 *d++ = (bgr&0x7C00)>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1075 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1076 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1077
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1078 static inline void RENAME(rgb16to24)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1079 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1080 const uint16_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1081 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1082 const uint16_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1083 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1084 uint8_t d = (uint8_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1085 const uint16_t s = (const uint16_t )src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1086 end = s + src_size/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1087 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1088 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1089 mm_end = end - 7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1090 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1091 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1092 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1093 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1094 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1095 "movq %1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1096 "movq %1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1097 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1098 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1099 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1100 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1101 "psrlq $3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1102 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1103 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1104 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1105 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1106 "punpcklwd %5, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1107 "punpcklwd %5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1108 "punpcklwd %5, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1109 "punpckhwd %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1110 "punpckhwd %5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1111 "punpckhwd %5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1112 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1113 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1114 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1115 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1116 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1117 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1118 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1119 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1120
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1121 "movq %%mm0, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1122 "movq %%mm3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1123
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1124 "movq 8%1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1125 "movq 8%1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1126 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1127 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1128 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1129 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1130 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1131 "psrlq $3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1132 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1133 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1134 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1135 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1136 "punpcklwd %5, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1137 "punpcklwd %5, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1138 "punpcklwd %5, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1139 "punpckhwd %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1140 "punpckhwd %5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1141 "punpckhwd %5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1142 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1143 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1144 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1145 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1146 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1147 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1148 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1149 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1150 :"=m"(*d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1151 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1152 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1153 /* Borrowed 32 to 24 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1154 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1155 "movq %%mm0, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1156 "movq %%mm3, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1157 "movq %%mm6, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1158 "movq %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1159
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1160 "movq %%mm4, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1161 "movq %%mm5, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1162 "movq %%mm0, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1163 "movq %%mm1, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1164
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1165 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1166 "psrlq $8, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1167 "psrlq $8, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1168 "psrlq $8, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1169 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1170 "pand %2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1171 "pand %2, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1172 "pand %2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1173 "pand %3, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1174 "pand %3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1175 "pand %3, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1176 "pand %3, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1177 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1178 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1179 "por %%mm6, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1180 "por %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1181
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1182 "movq %%mm1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1183 "movq %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1184 "psllq $48, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1185 "psllq $32, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1186 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1187 "pand %5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1188 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1189 "psrlq $16, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1190 "psrlq $32, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1191 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1192 "por %%mm3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1193 "pand %6, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1194 "por %%mm5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1195
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1196 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1197 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1198 MOVNTQ" %%mm4, 16%0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1199
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1200 :"=m"(*d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1201 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1202 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1203 d += 24;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1204 s += 8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1205 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1206 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1207 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1208 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1209 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1210 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1211 register uint16_t bgr;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1212 bgr = *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1213 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1214 *d++ = (bgr&0x7E0)>>3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1215 *d++ = (bgr&0xF800)>>8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1216 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1217 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1218
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1219 static inline void RENAME(rgb15to32)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1220 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1221 const uint16_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1222 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1223 const uint16_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1224 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1225 uint8_t d = (uint8_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1226 const uint16_t s = (const uint16_t )src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1227 end = s + src_size/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1228 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1229 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1230 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1231 mm_end = end - 3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1232 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1233 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1234 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1235 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1236 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1237 "movq %1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1238 "movq %1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1239 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1240 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1241 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1242 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1243 "psrlq $2, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1244 "psrlq $7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1245 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1246 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1247 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1248 "punpcklwd %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1249 "punpcklwd %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1250 "punpcklwd %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1251 "punpckhwd %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1252 "punpckhwd %%mm7, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1253 "punpckhwd %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1254 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1255 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1256 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1257 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1258 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1259 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1260 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1261 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1262 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1263 MOVNTQ" %%mm3, 8%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1264 :"=m"(*d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1265 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1266 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1267 d += 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1268 s += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1269 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1270 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1271 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1272 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1273 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1274 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1275 #if 0 //slightly slower on athlon
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1276 int bgr= *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1277 ((uint32_t)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1278 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1279 register uint16_t bgr;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1280 bgr = *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1281 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1282 *d++ = 0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1283 *d++ = (bgr&0x7C00)>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1284 *d++ = (bgr&0x3E0)>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1285 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1286 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1287 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1288 *d++ = (bgr&0x3E0)>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1289 *d++ = (bgr&0x7C00)>>7;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1290 *d++ = 0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1291 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1292
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1293 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1294 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1295 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1296
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1297 static inline void RENAME(rgb16to32)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1298 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1299 const uint16_t *end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1300 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1301 const uint16_t *mm_end;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1302 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1303 uint8_t d = (uint8_t )dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1304 const uint16_t s = (uint16_t )src;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1305 end = s + src_size/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1306 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1307 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1308 __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1309 mm_end = end - 3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1310 while(s < mm_end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1311 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1312 __asm __volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1313 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1314 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1315 "movq %1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1316 "movq %1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1317 "pand %2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1318 "pand %3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1319 "pand %4, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1320 "psllq $3, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1321 "psrlq $3, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1322 "psrlq $8, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1323 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1324 "movq %%mm1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1325 "movq %%mm2, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1326 "punpcklwd %%mm7, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1327 "punpcklwd %%mm7, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1328 "punpcklwd %%mm7, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1329 "punpckhwd %%mm7, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1330 "punpckhwd %%mm7, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1331 "punpckhwd %%mm7, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1332 "psllq $8, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1333 "psllq $16, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1334 "por %%mm1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1335 "por %%mm2, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1336 "psllq $8, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1337 "psllq $16, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1338 "por %%mm4, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1339 "por %%mm5, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1340 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1341 MOVNTQ" %%mm3, 8%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1342 :"=m"(*d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1343 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1344 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1345 d += 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1346 s += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1347 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1348 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1349 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1350 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1351 while(s < end)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1352 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1353 register uint16_t bgr;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1354 bgr = *s++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1355 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1356 *d++ = 0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1357 *d++ = (bgr&0xF800)>>8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1358 *d++ = (bgr&0x7E0)>>3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1359 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1360 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1361 *d++ = (bgr&0x1F)<<3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1362 *d++ = (bgr&0x7E0)>>3;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1363 *d++ = (bgr&0xF800)>>8;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1364 *d++ = 0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1365 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1366 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1367 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1368
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1369 static inline void RENAME(rgb32tobgr32)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1370 {
22994 ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1371 long idx = 15 - src_size;
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1372 uint8_t s = (uint8_t ) src-idx, *d = dst-idx;
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1373 #ifdef HAVE_MMX
22991 59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1374 __asm __volatile(
22994 ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1375 " test %0, %0 \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1376 " jns 2f \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1377 " "PREFETCH" (%1, %0) \n"
22991 59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1378 " movq %3, %%mm7 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1379 " pxor %4, %%mm7 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1380 " movq %%mm7, %%mm6 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1381 " pxor %5, %%mm7 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1382 ASMALIGN(4)
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1383 "1: \n"
22994 ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1384 " "PREFETCH" 32(%1, %0) \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1385 " movq (%1, %0), %%mm0 \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1386 " movq 8(%1, %0), %%mm1 \n"
22991 59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1387 # ifdef HAVE_MMX2
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1388 " pshufw $177, %%mm0, %%mm3 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1389 " pshufw $177, %%mm1, %%mm5 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1390 " pand %%mm7, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1391 " pand %%mm6, %%mm3 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1392 " pand %%mm7, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1393 " pand %%mm6, %%mm5 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1394 " por %%mm3, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1395 " por %%mm5, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1396 # else
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1397 " movq %%mm0, %%mm2 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1398 " movq %%mm1, %%mm4 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1399 " pand %%mm7, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1400 " pand %%mm6, %%mm2 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1401 " pand %%mm7, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1402 " pand %%mm6, %%mm4 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1403 " movq %%mm2, %%mm3 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1404 " movq %%mm4, %%mm5 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1405 " pslld $16, %%mm2 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1406 " psrld $16, %%mm3 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1407 " pslld $16, %%mm4 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1408 " psrld $16, %%mm5 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1409 " por %%mm2, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1410 " por %%mm4, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1411 " por %%mm3, %%mm0 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1412 " por %%mm5, %%mm1 \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1413 # endif
22994 ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1414 " "MOVNTQ" %%mm0, (%2, %0) \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1415 " "MOVNTQ" %%mm1, 8(%2, %0) \n"
22991 59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1416 " add $16, %0 \n"
22994 ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1417 " js 1b \n"
22991 59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1418 " "SFENCE" \n"
59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1419 " "EMMS" \n"
22994 ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1420 "2: \n"
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1421 : "+&r"(idx)
ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1422 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
22991 59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1423 : "memory");
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1424 #endif
22994 ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1425 for (; idx<15; idx+=4) {
23004 beb4ac492c5e preserve alpha channel when converting rgb32 to bgr32 in plain C ivo parents: 22997 diff changeset	1426 register int v = (uint32_t )&s[idx], g = v & 0xff00ff00;
22991 59671a52cc82 New implementation of rgb32tobgr32 ivo parents: 22960 diff changeset	1427 v &= 0xff00ff;
22994 ac77d9ef8c83 slightly faster rgb32tobgr32; avoid one add and one cmp ivo parents: 22991 diff changeset	1428 (uint32_t )&d[idx] = (v>>16) + g + (v<<16);
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1429 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1430 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1431
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1432 static inline void RENAME(rgb24tobgr24)(const uint8_t src, uint8_t dst, long src_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1433 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1434 unsigned i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1435 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1436 long mmx_size= 23 - src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1437 asm volatile (
22997 fd0fda0c6555 skip MMX code in rgb24tobgr24 if the size of the input is smaller than the ivo parents: 22996 diff changeset	1438 "test %%"REG_a", %%"REG_a" \n\t"
fd0fda0c6555 skip MMX code in rgb24tobgr24 if the size of the input is smaller than the ivo parents: 22996 diff changeset	1439 "jns 2f \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1440 "movq "MANGLE(mask24r)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1441 "movq "MANGLE(mask24g)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1442 "movq "MANGLE(mask24b)", %%mm7 \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	1443 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1444 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1445 PREFETCH" 32(%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1446 "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1447 "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1448 "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1449 "psllq $16, %%mm0 \n\t" // 00 BGR BGR
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1450 "pand %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1451 "pand %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1452 "pand %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1453 "por %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1454 "por %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1455 "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1456 MOVNTQ" %%mm1, (%2, %%"REG_a")\n\t" // RGB RGB RG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1457 "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1458 "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1459 "pand %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1460 "pand %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1461 "pand %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1462 "por %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1463 "por %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1464 "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1465 MOVNTQ" %%mm1, 8(%2, %%"REG_a")\n\t" // B RGB RGB R
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1466 "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1467 "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1468 "pand %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1469 "pand %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1470 "pand %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1471 "por %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1472 "por %%mm2, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1473 MOVNTQ" %%mm1, 16(%2, %%"REG_a")\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1474 "add $24, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1475 " js 1b \n\t"
22997 fd0fda0c6555 skip MMX code in rgb24tobgr24 if the size of the input is smaller than the ivo parents: 22996 diff changeset	1476 "2: \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1477 : "+a" (mmx_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1478 : "r" (src-mmx_size), "r"(dst-mmx_size)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1479 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1480
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1481 __asm __volatile(SFENCE:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1482 __asm __volatile(EMMS:::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1483
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1484 if(mmx_size==23) return; //finihsed, was multiple of 8
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1485
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1486 src+= src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1487 dst+= src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1488 src_size= 23-mmx_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1489 src-= src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1490 dst-= src_size;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1491 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1492 for(i=0; i<src_size; i+=3)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1493 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1494 register uint8_t x;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1495 x = src[i + 2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1496 dst[i + 1] = src[i + 1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1497 dst[i + 2] = src[i + 0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1498 dst[i + 0] = x;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1499 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1500 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1501
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1502 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t ysrc, const uint8_t usrc, const uint8_t vsrc, uint8_t dst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1503 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1504 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1505 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1506 long y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1507 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1508 for(y=0; y<height; y++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1509 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1510 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1511 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1512 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1513 "xor %%"REG_a", %%"REG_a" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	1514 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1515 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1516 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1517 PREFETCH" 32(%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1518 PREFETCH" 32(%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1519 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1520 "movq %%mm0, %%mm2 \n\t" // U(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1521 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1522 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1523 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1524
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1525 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1526 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1527 "movq %%mm3, %%mm4 \n\t" // Y(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1528 "movq %%mm5, %%mm6 \n\t" // Y(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1529 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1530 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1531 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1532 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1533
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1534 MOVNTQ" %%mm3, (%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1535 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1536 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1537 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1538
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1539 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1540 "cmp %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1541 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1542 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1543 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1544 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1545 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1546
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1547 #if defined ARCH_ALPHA && defined HAVE_MVI
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1548 #define pl2yuy2(n) \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1549 y1 = yc[n]; \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1550 y2 = yc2[n]; \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1551 u = uc[n]; \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1552 v = vc[n]; \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1553 asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1554 asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1555 asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1556 asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1557 yuv1 = (u << 8) + (v << 24); \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1558 yuv2 = yuv1 + y2; \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1559 yuv1 += y1; \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1560 qdst[n] = yuv1; \
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1561 qdst2[n] = yuv2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1562
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1563 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1564 uint64_t qdst = (uint64_t ) dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1565 uint64_t qdst2 = (uint64_t ) (dst + dstStride);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1566 const uint32_t yc = (uint32_t ) ysrc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1567 const uint32_t yc2 = (uint32_t ) (ysrc + lumStride);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1568 const uint16_t uc = (uint16_t) usrc, vc = (uint16_t) vsrc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1569 for(i = 0; i < chromWidth; i += 8){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1570 uint64_t y1, y2, yuv1, yuv2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1571 uint64_t u, v;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1572 /* Prefetch */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1573 asm("ldq $31,64(%0)" :: "r"(yc));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1574 asm("ldq $31,64(%0)" :: "r"(yc2));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1575 asm("ldq $31,64(%0)" :: "r"(uc));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1576 asm("ldq $31,64(%0)" :: "r"(vc));
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1577
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1578 pl2yuy2(0);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1579 pl2yuy2(1);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1580 pl2yuy2(2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1581 pl2yuy2(3);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1582
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1583 yc += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1584 yc2 += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1585 uc += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1586 vc += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1587 qdst += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1588 qdst2 += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1589 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1590 y++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1591 ysrc += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1592 dst += dstStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1593
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1594 #elif __WORDSIZE >= 64
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1595 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1596 uint64_t ldst = (uint64_t ) dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1597 const uint8_t yc = ysrc, uc = usrc, *vc = vsrc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1598 for(i = 0; i < chromWidth; i += 2){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1599 uint64_t k, l;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1600 k = yc[0] + (uc[0] << 8) +
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1601 (yc[1] << 16) + (vc[0] << 24);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1602 l = yc[2] + (uc[1] << 8) +
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1603 (yc[3] << 16) + (vc[1] << 24);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1604 *ldst++ = k + (l << 32);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1605 yc += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1606 uc += 2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1607 vc += 2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1608 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1609
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1610 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1611 int i, idst = (int32_t ) dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1612 const uint8_t yc = ysrc, uc = usrc, *vc = vsrc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1613 for(i = 0; i < chromWidth; i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1614 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1615 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1616 (yc[1] << 8) + (vc[0] << 0);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1617 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1618 *idst++ = yc[0] + (uc[0] << 8) +
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1619 (yc[1] << 16) + (vc[0] << 24);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1620 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1621 yc += 2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1622 uc++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1623 vc++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1624 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1625 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1626 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1627 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1628 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1629 usrc += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1630 vsrc += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1631 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1632 ysrc += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1633 dst += dstStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1634 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1635 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1636 asm( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1637 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1638 :::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1639 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1640 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1641
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1642 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1643 *
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1644 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1645 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1646 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1647 static inline void RENAME(yv12toyuy2)(const uint8_t ysrc, const uint8_t usrc, const uint8_t vsrc, uint8_t dst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1648 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1649 long lumStride, long chromStride, long dstStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1650 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1651 //FIXME interpolate chroma
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1652 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1653 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1654
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1655 static inline void RENAME(yuvPlanartouyvy)(const uint8_t ysrc, const uint8_t usrc, const uint8_t vsrc, uint8_t dst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1656 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1657 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1658 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1659 long y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1660 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1661 for(y=0; y<height; y++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1662 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1663 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1664 //FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1665 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1666 "xor %%"REG_a", %%"REG_a" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	1667 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1668 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1669 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1670 PREFETCH" 32(%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1671 PREFETCH" 32(%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1672 "movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1673 "movq %%mm0, %%mm2 \n\t" // U(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1674 "movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1675 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1676 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1677
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1678 "movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1679 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1680 "movq %%mm0, %%mm4 \n\t" // Y(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1681 "movq %%mm2, %%mm6 \n\t" // Y(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1682 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1683 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1684 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1685 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1686
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1687 MOVNTQ" %%mm0, (%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1688 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1689 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1690 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1691
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1692 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1693 "cmp %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1694 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1695 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1696 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1697 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1698 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1699 //FIXME adapt the alpha asm code from yv12->yuy2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1700
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1701 #if __WORDSIZE >= 64
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1702 int i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1703 uint64_t ldst = (uint64_t ) dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1704 const uint8_t yc = ysrc, uc = usrc, *vc = vsrc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1705 for(i = 0; i < chromWidth; i += 2){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1706 uint64_t k, l;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1707 k = uc[0] + (yc[0] << 8) +
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1708 (vc[0] << 16) + (yc[1] << 24);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1709 l = uc[1] + (yc[2] << 8) +
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1710 (vc[1] << 16) + (yc[3] << 24);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1711 *ldst++ = k + (l << 32);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1712 yc += 4;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1713 uc += 2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1714 vc += 2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1715 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1716
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1717 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1718 int i, idst = (int32_t ) dst;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1719 const uint8_t yc = ysrc, uc = usrc, *vc = vsrc;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1720 for(i = 0; i < chromWidth; i++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1721 #ifdef WORDS_BIGENDIAN
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1722 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1723 (vc[0] << 8) + (yc[1] << 0);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1724 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1725 *idst++ = uc[0] + (yc[0] << 8) +
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1726 (vc[0] << 16) + (yc[1] << 24);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1727 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1728 yc += 2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1729 uc++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1730 vc++;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1731 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1732 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1733 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1734 if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1735 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1736 usrc += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1737 vsrc += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1738 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1739 ysrc += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1740 dst += dstStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1741 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1742 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1743 asm( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1744 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1745 :::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1746 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1747 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1748
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1749 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1750 *
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1751 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1752 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1753 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1754 static inline void RENAME(yv12touyvy)(const uint8_t ysrc, const uint8_t usrc, const uint8_t vsrc, uint8_t dst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1755 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1756 long lumStride, long chromStride, long dstStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1757 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1758 //FIXME interpolate chroma
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1759 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1760 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1761
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1762 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1763 *
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1764 * width should be a multiple of 16
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1765 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1766 static inline void RENAME(yuv422ptoyuy2)(const uint8_t ysrc, const uint8_t usrc, const uint8_t vsrc, uint8_t dst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1767 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1768 long lumStride, long chromStride, long dstStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1769 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1770 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1771 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1772
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1773 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1774 *
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1775 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1776 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1777 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1778 static inline void RENAME(yuy2toyv12)(const uint8_t src, uint8_t ydst, uint8_t udst, uint8_t vdst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1779 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1780 long lumStride, long chromStride, long srcStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1781 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1782 long y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1783 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1784 for(y=0; y<height; y+=2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1785 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1786 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1787 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1788 "xor %%"REG_a", %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1789 "pcmpeqw %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1790 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	1791 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1792 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1793 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1794 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1795 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1796 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1797 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1798 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1799 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1800 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1801 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1802 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1803 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1804
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1805 MOVNTQ" %%mm2, (%1, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1806
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1807 "movq 16(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1808 "movq 24(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1809 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1810 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1811 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1812 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1813 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1814 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1815 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1816 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1817
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1818 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1819
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1820 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1821 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1822 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1823 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1824 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1825 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1826 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1827 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1828
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1829 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1830 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1831
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1832 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1833 "cmp %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1834 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1835 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1836 : "memory", "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1837 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1838
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1839 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1840 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1841
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1842 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1843 "xor %%"REG_a", %%"REG_a" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	1844 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1845 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1846 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1847 "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1848 "movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1849 "movq 16(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1850 "movq 24(%0, %%"REG_a", 4), %%mm3\n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1851 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1852 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1853 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1854 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1855 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1856 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1857
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1858 MOVNTQ" %%mm0, (%1, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1859 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1860
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1861 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1862 "cmp %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1863 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1864
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1865 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1866 : "memory", "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1867 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1868 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1869 long i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1870 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1871 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1872 ydst[2i+0] = src[4i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1873 udst[i] = src[4*i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1874 ydst[2i+1] = src[4i+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1875 vdst[i] = src[4*i+3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1876 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1877 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1878 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1879
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1880 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1881 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1882 ydst[2i+0] = src[4i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1883 ydst[2i+1] = src[4i+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1884 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1885 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1886 udst += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1887 vdst += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1888 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1889 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1890 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1891 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1892 asm volatile( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1893 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1894 :::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1895 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1896 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1897
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1898 static inline void RENAME(yvu9toyv12)(const uint8_t ysrc, const uint8_t usrc, const uint8_t *vsrc,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1899 uint8_t ydst, uint8_t udst, uint8_t *vdst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1900 long width, long height, long lumStride, long chromStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1901 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1902 /* Y Plane */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1903 memcpy(ydst, ysrc, width*height);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1904
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1905 /* XXX: implement upscaling for U,V */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1906 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1907
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1908 static inline void RENAME(planar2x)(const uint8_t src, uint8_t dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1909 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1910 long x,y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1911
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1912 dst[0]= src[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1913
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1914 // first line
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1915 for(x=0; x<srcWidth-1; x++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1916 dst[2x+1]= (3src[x] + src[x+1])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1917 dst[2x+2]= ( src[x] + 3src[x+1])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1918 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1919 dst[2*srcWidth-1]= src[srcWidth-1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1920
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1921 dst+= dstStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1922
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1923 for(y=1; y<srcHeight; y++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1924 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1925 const long mmxSize= srcWidth&~15;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1926 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1927 "mov %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1928 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1929 "movq (%0, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1930 "movq (%1, %%"REG_a"), %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1931 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1932 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1933 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1934 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1935 PAVGB" %%mm0, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1936 PAVGB" %%mm0, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1937 PAVGB" %%mm0, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1938 PAVGB" %%mm0, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1939 PAVGB" %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1940 PAVGB" %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1941 PAVGB" %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1942 PAVGB" %%mm1, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1943 "movq %%mm5, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1944 "movq %%mm4, %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1945 "punpcklbw %%mm3, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1946 "punpckhbw %%mm3, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1947 "punpcklbw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1948 "punpckhbw %%mm2, %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1949 #if 1
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1950 MOVNTQ" %%mm5, (%2, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1951 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1952 MOVNTQ" %%mm4, (%3, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1953 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1954 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1955 "movq %%mm5, (%2, %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1956 "movq %%mm7, 8(%2, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1957 "movq %%mm4, (%3, %%"REG_a", 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1958 "movq %%mm6, 8(%3, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1959 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1960 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1961 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1962 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1963 "r" (dst + mmxSize2), "r" (dst + dstStride + mmxSize2),
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1964 "g" (-mmxSize)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1965 : "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1966
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1967 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1968 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1969 const long mmxSize=1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1970 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1971 dst[0 ]= (3*src[0] + src[srcStride])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1972 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1973
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1974 for(x=mmxSize-1; x<srcWidth-1; x++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1975 dst[2x +1]= (3src[x+0] + src[x+srcStride+1])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1976 dst[2x+dstStride+2]= ( src[x+0] + 3src[x+srcStride+1])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1977 dst[2x+dstStride+1]= ( src[x+1] + 3src[x+srcStride ])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1978 dst[2x +2]= (3src[x+1] + src[x+srcStride ])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1979 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1980 dst[srcWidth2 -1 ]= (3src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1981 dst[srcWidth2 -1 + dstStride]= ( src[srcWidth-1] + 3src[srcWidth-1 + srcStride])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1982
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1983 dst+=dstStride*2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1984 src+=srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1985 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1986
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1987 // last line
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1988 #if 1
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1989 dst[0]= src[0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1990
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1991 for(x=0; x<srcWidth-1; x++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1992 dst[2x+1]= (3src[x] + src[x+1])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1993 dst[2x+2]= ( src[x] + 3src[x+1])>>2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1994 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1995 dst[2*srcWidth-1]= src[srcWidth-1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1996 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1997 for(x=0; x<srcWidth; x++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1998 dst[2*x+0]=
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	1999 dst[2*x+1]= src[x];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2000 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2001 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2002
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2003 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2004 asm volatile( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2005 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2006 :::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2007 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2008 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2009
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2010 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2011 *
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2012 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2013 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2014 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2015 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2016 static inline void RENAME(uyvytoyv12)(const uint8_t src, uint8_t ydst, uint8_t udst, uint8_t vdst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2017 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2018 long lumStride, long chromStride, long srcStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2019 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2020 long y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2021 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2022 for(y=0; y<height; y+=2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2023 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2024 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2025 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2026 "xorl %%eax, %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2027 "pcmpeqw %%mm7, %%mm7 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2028 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	2029 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2030 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2031 PREFETCH" 64(%0, %%eax, 4) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2032 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2033 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2034 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2035 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2036 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2037 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2038 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2039 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2040 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2041 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2042
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2043 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2044
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2045 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2046 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // UYVY UYVY(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2047 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2048 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2049 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2050 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2051 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2052 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2053 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2054 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2055
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2056 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2057
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2058 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2059 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2060 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2061 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2062 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2063 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2064 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2065 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2066
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2067 MOVNTQ" %%mm0, (%3, %%eax) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2068 MOVNTQ" %%mm2, (%2, %%eax) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2069
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2070 "addl $8, %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2071 "cmpl %4, %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2072 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2073 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2074 : "memory", "%eax"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2075 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2076
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2077 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2078 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2079
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2080 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2081 "xorl %%eax, %%eax \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	2082 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2083 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2084 PREFETCH" 64(%0, %%eax, 4) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2085 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2086 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2087 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2088 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2089 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2090 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2091 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2092 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2093 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2094 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2095
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2096 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2097 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2098
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2099 "addl $8, %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2100 "cmpl %4, %%eax \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2101 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2102
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2103 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2104 : "memory", "%eax"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2105 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2106 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2107 long i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2108 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2109 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2110 udst[i] = src[4*i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2111 ydst[2i+0] = src[4i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2112 vdst[i] = src[4*i+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2113 ydst[2i+1] = src[4i+3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2114 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2115 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2116 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2117
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2118 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2119 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2120 ydst[2i+0] = src[4i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2121 ydst[2i+1] = src[4i+3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2122 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2123 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2124 udst += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2125 vdst += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2126 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2127 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2128 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2129 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2130 asm volatile( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2131 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2132 :::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2133 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2134 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2135
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2136 /**
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2137 *
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2138 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2139 * problem for anyone then tell me, and ill fix it)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2140 * chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2141 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2142 static inline void RENAME(rgb24toyv12)(const uint8_t src, uint8_t ydst, uint8_t udst, uint8_t vdst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2143 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2144 long lumStride, long chromStride, long srcStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2145 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2146 long y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2147 const long chromWidth= width>>1;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2148 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2149 for(y=0; y<height-2; y+=2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2150 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2151 long i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2152 for(i=0; i<2; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2153 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2154 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2155 "mov %2, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2156 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2157 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2158 "pxor %%mm7, %%mm7 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2159 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	2160 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2161 "1: \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2162 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2163 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2164 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2165 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2166 "punpcklbw %%mm7, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2167 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2168 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2169 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2170 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2171 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2172 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2173 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2174 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2175 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2176 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2177 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2178 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2179 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2180 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2181 "packssdw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2182 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2183 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2184 "pmaddwd %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2185 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2186 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2187
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2188 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2189 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2190 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2191 "punpcklbw %%mm7, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2192 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2193 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2194 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2195 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2196 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2197 "pmaddwd %%mm6, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2198 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2199 "pmaddwd %%mm6, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2200 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2201 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2202 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2203 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2204 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2205 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2206 "packssdw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2207 "packssdw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2208 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2209 "pmaddwd %%mm5, %%mm2 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2210 "add $24, %%"REG_d" \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2211 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2212 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2213
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2214 "packuswb %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2215 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2216
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2217 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2218 "add $8, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2219 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2220 : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2221 : "%"REG_a, "%"REG_d
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2222 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2223 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2224 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2225 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2226 src -= srcStride*2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2227 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2228 "mov %4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2229 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2230 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2231 "pxor %%mm7, %%mm7 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2232 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2233 "add %%"REG_d", %%"REG_d" \n\t"
19372 6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h. diego parents: 18861 diff changeset	2234 ASMALIGN(4)
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2235 "1: \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2236 PREFETCH" 64(%0, %%"REG_d") \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2237 PREFETCH" 64(%1, %%"REG_d") \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2238 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2239 "movq (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2240 "movq (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2241 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2242 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2243 PAVGB" %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2244 PAVGB" %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2245 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2246 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2247 "psrlq $24, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2248 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2249 PAVGB" %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2250 PAVGB" %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2251 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2252 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2253 #else
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2254 "movd (%0, %%"REG_d"), %%mm0 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2255 "movd (%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2256 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2257 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2258 "punpcklbw %%mm7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2259 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2260 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2261 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2262 "paddw %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2263 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2264 "paddw %%mm2, %%mm0 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2265 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2266 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2267 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2268 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2269 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2270 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2271 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2272 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2273 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2274 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2275 "paddw %%mm4, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2276 "psrlw $2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2277 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2278 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2279 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2280 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2281
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2282 "pmaddwd %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2283 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2284 "pmaddwd %%mm6, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2285 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2286 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2287 "psrad $8, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2288 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2289 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2290 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2291 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2292 "packssdw %%mm2, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2293 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2294 "pmaddwd %%mm5, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2295 "pmaddwd %%mm5, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2296 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2297 "psraw $7, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2298
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2299 #if defined (HAVE_MMX2) \|\| defined (HAVE_3DNOW)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2300 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2301 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2302 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2303 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2304 PAVGB" %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2305 PAVGB" %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2306 "movq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2307 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2308 "psrlq $24, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2309 "psrlq $24, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2310 PAVGB" %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2311 PAVGB" %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2312 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2313 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2314 #else
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2315 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2316 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2317 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2318 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2319 "punpcklbw %%mm7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2320 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2321 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2322 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2323 "paddw %%mm1, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2324 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2325 "paddw %%mm2, %%mm4 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2326 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2327 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2328 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2329 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2330 "punpcklbw %%mm7, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2331 "punpcklbw %%mm7, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2332 "punpcklbw %%mm7, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2333 "punpcklbw %%mm7, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2334 "paddw %%mm1, %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2335 "paddw %%mm3, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2336 "paddw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2337 "movq "MANGLE(w1111)", %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2338 "psrlw $2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2339 "psrlw $2, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2340 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2341 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2342 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2343
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2344 "pmaddwd %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2345 "pmaddwd %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2346 "pmaddwd %%mm6, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2347 "pmaddwd %%mm6, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2348 #ifndef FAST_BGR2YV12
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2349 "psrad $8, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2350 "psrad $8, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2351 "psrad $8, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2352 "psrad $8, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2353 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2354 "packssdw %%mm2, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2355 "packssdw %%mm3, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2356 "pmaddwd %%mm5, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2357 "pmaddwd %%mm5, %%mm1 \n\t"
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2358 "add $24, %%"REG_d" \n\t"
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2359 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2360 "psraw $7, %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2361
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2362 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2363 "punpckldq %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2364 "punpckhdq %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2365 "packsswb %%mm1, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2366 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2367 "movd %%mm0, (%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2368 "punpckhdq %%mm0, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2369 "movd %%mm0, (%3, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2370 "add $4, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2371 " js 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2372 : : "r" (src+chromWidth6), "r" (src+srcStride+chromWidth6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
19396 8fe37c66d10a -fPIC support for libswscale diego parents: 19372 diff changeset	2373 : "%"REG_a, "%"REG_d
18861 8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2374 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2375
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2376 udst += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2377 vdst += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2378 src += srcStride*2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2379 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2380
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2381 asm volatile( EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2382 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2383 :::"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2384 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2385 y=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2386 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2387 for(; y<height; y+=2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2388 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2389 long i;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2390 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2391 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2392 unsigned int b= src[6*i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2393 unsigned int g= src[6*i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2394 unsigned int r= src[6*i+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2395
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2396 unsigned int Y = ((RYr + GYg + BY*b)>>RGB2YUV_SHIFT) + 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2397 unsigned int V = ((RVr + GVg + BV*b)>>RGB2YUV_SHIFT) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2398 unsigned int U = ((RUr + GUg + BU*b)>>RGB2YUV_SHIFT) + 128;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2399
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2400 udst[i] = U;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2401 vdst[i] = V;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2402 ydst[2*i] = Y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2403
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2404 b= src[6*i+3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2405 g= src[6*i+4];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2406 r= src[6*i+5];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2407
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2408 Y = ((RYr + GYg + BY*b)>>RGB2YUV_SHIFT) + 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2409 ydst[2*i+1] = Y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2410 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2411 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2412 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2413
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2414 for(i=0; i<chromWidth; i++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2415 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2416 unsigned int b= src[6*i+0];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2417 unsigned int g= src[6*i+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2418 unsigned int r= src[6*i+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2419
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2420 unsigned int Y = ((RYr + GYg + BY*b)>>RGB2YUV_SHIFT) + 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2421
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2422 ydst[2*i] = Y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2423
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2424 b= src[6*i+3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2425 g= src[6*i+4];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2426 r= src[6*i+5];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2427
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2428 Y = ((RYr + GYg + BY*b)>>RGB2YUV_SHIFT) + 16;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2429 ydst[2*i+1] = Y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2430 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2431 udst += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2432 vdst += chromStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2433 ydst += lumStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2434 src += srcStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2435 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2436 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2437
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2438 void RENAME(interleaveBytes)(uint8_t src1, uint8_t src2, uint8_t *dest,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2439 long width, long height, long src1Stride,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2440 long src2Stride, long dstStride){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2441 long h;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2442
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2443 for(h=0; h < height; h++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2444 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2445 long w;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2446
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2447 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2448 #ifdef HAVE_SSE2
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2449 asm(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2450 "xor %%"REG_a", %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2451 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2452 PREFETCH" 64(%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2453 PREFETCH" 64(%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2454 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2455 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2456 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2457 "punpcklbw %%xmm2, %%xmm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2458 "punpckhbw %%xmm2, %%xmm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2459 "movntdq %%xmm0, (%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2460 "movntdq %%xmm1, 16(%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2461 "add $16, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2462 "cmp %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2463 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2464 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2465 : "memory", "%"REG_a""
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2466 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2467 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2468 asm(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2469 "xor %%"REG_a", %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2470 "1: \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2471 PREFETCH" 64(%1, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2472 PREFETCH" 64(%2, %%"REG_a") \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2473 "movq (%1, %%"REG_a"), %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2474 "movq 8(%1, %%"REG_a"), %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2475 "movq %%mm0, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2476 "movq %%mm2, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2477 "movq (%2, %%"REG_a"), %%mm4 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2478 "movq 8(%2, %%"REG_a"), %%mm5 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2479 "punpcklbw %%mm4, %%mm0 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2480 "punpckhbw %%mm4, %%mm1 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2481 "punpcklbw %%mm5, %%mm2 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2482 "punpckhbw %%mm5, %%mm3 \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2483 MOVNTQ" %%mm0, (%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2484 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2485 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2486 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2487 "add $16, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2488 "cmp %3, %%"REG_a" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2489 " jb 1b \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2490 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2491 : "memory", "%"REG_a
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2492 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2493 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2494 for(w= (width&(~15)); w < width; w++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2495 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2496 dest[2*w+0] = src1[w];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2497 dest[2*w+1] = src2[w];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2498 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2499 #else
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2500 for(w=0; w < width; w++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2501 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2502 dest[2*w+0] = src1[w];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2503 dest[2*w+1] = src2[w];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2504 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2505 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2506 dest += dstStride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2507 src1 += src1Stride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2508 src2 += src2Stride;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2509 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2510 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2511 asm(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2512 EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2513 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2514 ::: "memory"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2515 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2516 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2517 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2518
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2519 static inline void RENAME(vu9_to_vu12)(const uint8_t src1, const uint8_t src2,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2520 uint8_t dst1, uint8_t dst2,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2521 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2522 long srcStride1, long srcStride2,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2523 long dstStride1, long dstStride2)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2524 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2525 long y,x,w,h;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2526 w=width/2; h=height/2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2527 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2528 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2529 PREFETCH" %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2530 PREFETCH" %1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2531 ::"m"((src1+srcStride1)),"m"((src2+srcStride2)):"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2532 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2533 for(y=0;y<h;y++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2534 const uint8_t* s1=src1+srcStride1*(y>>1);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2535 uint8_t* d=dst1+dstStride1*y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2536 x=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2537 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2538 for(;x<w-31;x+=32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2539 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2540 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2541 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2542 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2543 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2544 "movq 16%1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2545 "movq 24%1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2546 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2547 "movq %%mm2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2548 "movq %%mm4, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2549 "movq %%mm6, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2550 "punpcklbw %%mm0, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2551 "punpckhbw %%mm1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2552 "punpcklbw %%mm2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2553 "punpckhbw %%mm3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2554 "punpcklbw %%mm4, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2555 "punpckhbw %%mm5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2556 "punpcklbw %%mm6, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2557 "punpckhbw %%mm7, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2558 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2559 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2560 MOVNTQ" %%mm2, 16%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2561 MOVNTQ" %%mm3, 24%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2562 MOVNTQ" %%mm4, 32%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2563 MOVNTQ" %%mm5, 40%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2564 MOVNTQ" %%mm6, 48%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2565 MOVNTQ" %%mm7, 56%0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2566 :"=m"(d[2*x])
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2567 :"m"(s1[x])
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2568 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2569 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2570 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2571 for(;x<w;x++) d[2x]=d[2x+1]=s1[x];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2572 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2573 for(y=0;y<h;y++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2574 const uint8_t* s2=src2+srcStride2*(y>>1);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2575 uint8_t* d=dst2+dstStride2*y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2576 x=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2577 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2578 for(;x<w-31;x+=32)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2579 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2580 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2581 PREFETCH" 32%1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2582 "movq %1, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2583 "movq 8%1, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2584 "movq 16%1, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2585 "movq 24%1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2586 "movq %%mm0, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2587 "movq %%mm2, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2588 "movq %%mm4, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2589 "movq %%mm6, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2590 "punpcklbw %%mm0, %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2591 "punpckhbw %%mm1, %%mm1\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2592 "punpcklbw %%mm2, %%mm2\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2593 "punpckhbw %%mm3, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2594 "punpcklbw %%mm4, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2595 "punpckhbw %%mm5, %%mm5\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2596 "punpcklbw %%mm6, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2597 "punpckhbw %%mm7, %%mm7\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2598 MOVNTQ" %%mm0, %0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2599 MOVNTQ" %%mm1, 8%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2600 MOVNTQ" %%mm2, 16%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2601 MOVNTQ" %%mm3, 24%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2602 MOVNTQ" %%mm4, 32%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2603 MOVNTQ" %%mm5, 40%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2604 MOVNTQ" %%mm6, 48%0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2605 MOVNTQ" %%mm7, 56%0"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2606 :"=m"(d[2*x])
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2607 :"m"(s2[x])
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2608 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2609 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2610 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2611 for(;x<w;x++) d[2x]=d[2x+1]=s2[x];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2612 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2613 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2614 asm(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2615 EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2616 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2617 ::: "memory"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2618 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2619 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2620 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2621
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2622 static inline void RENAME(yvu9_to_yuy2)(const uint8_t src1, const uint8_t src2, const uint8_t *src3,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2623 uint8_t *dst,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2624 long width, long height,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2625 long srcStride1, long srcStride2,
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2626 long srcStride3, long dstStride)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2627 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2628 long y,x,w,h;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2629 w=width/2; h=height;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2630 for(y=0;y<h;y++){
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2631 const uint8_t* yp=src1+srcStride1*y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2632 const uint8_t* up=src2+srcStride2*(y>>2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2633 const uint8_t* vp=src3+srcStride3*(y>>2);
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2634 uint8_t* d=dst+dstStride*y;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2635 x=0;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2636 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2637 for(;x<w-7;x+=8)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2638 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2639 asm volatile(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2640 PREFETCH" 32(%1, %0)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2641 PREFETCH" 32(%2, %0)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2642 PREFETCH" 32(%3, %0)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2643 "movq (%1, %0, 4), %%mm0\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2644 "movq (%2, %0), %%mm1\n\t" /* U0U1U2U3U4U5U6U7 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2645 "movq (%3, %0), %%mm2\n\t" /* V0V1V2V3V4V5V6V7 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2646 "movq %%mm0, %%mm3\n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2647 "movq %%mm1, %%mm4\n\t" /* U0U1U2U3U4U5U6U7 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2648 "movq %%mm2, %%mm5\n\t" /* V0V1V2V3V4V5V6V7 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2649 "punpcklbw %%mm1, %%mm1\n\t" /* U0U0 U1U1 U2U2 U3U3 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2650 "punpcklbw %%mm2, %%mm2\n\t" /* V0V0 V1V1 V2V2 V3V3 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2651 "punpckhbw %%mm4, %%mm4\n\t" /* U4U4 U5U5 U6U6 U7U7 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2652 "punpckhbw %%mm5, %%mm5\n\t" /* V4V4 V5V5 V6V6 V7V7 */
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2653
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2654 "movq %%mm1, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2655 "punpcklbw %%mm2, %%mm1\n\t" /* U0V0 U0V0 U1V1 U1V1*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2656 "punpcklbw %%mm1, %%mm0\n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2657 "punpckhbw %%mm1, %%mm3\n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2658 MOVNTQ" %%mm0, (%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2659 MOVNTQ" %%mm3, 8(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2660
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2661 "punpckhbw %%mm2, %%mm6\n\t" /* U2V2 U2V2 U3V3 U3V3*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2662 "movq 8(%1, %0, 4), %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2663 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2664 "punpcklbw %%mm6, %%mm0\n\t" /* Y U2 Y V2 Y U2 Y V2*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2665 "punpckhbw %%mm6, %%mm3\n\t" /* Y U3 Y V3 Y U3 Y V3*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2666 MOVNTQ" %%mm0, 16(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2667 MOVNTQ" %%mm3, 24(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2668
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2669 "movq %%mm4, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2670 "movq 16(%1, %0, 4), %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2671 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2672 "punpcklbw %%mm5, %%mm4\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2673 "punpcklbw %%mm4, %%mm0\n\t" /* Y U4 Y V4 Y U4 Y V4*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2674 "punpckhbw %%mm4, %%mm3\n\t" /* Y U5 Y V5 Y U5 Y V5*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2675 MOVNTQ" %%mm0, 32(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2676 MOVNTQ" %%mm3, 40(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2677
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2678 "punpckhbw %%mm5, %%mm6\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2679 "movq 24(%1, %0, 4), %%mm0\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2680 "movq %%mm0, %%mm3\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2681 "punpcklbw %%mm6, %%mm0\n\t" /* Y U6 Y V6 Y U6 Y V6*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2682 "punpckhbw %%mm6, %%mm3\n\t" /* Y U7 Y V7 Y U7 Y V7*/
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2683 MOVNTQ" %%mm0, 48(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2684 MOVNTQ" %%mm3, 56(%4, %0, 8)\n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2685
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2686 : "+r" (x)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2687 : "r"(yp), "r" (up), "r"(vp), "r"(d)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2688 :"memory");
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2689 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2690 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2691 for(; x<w; x++)
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2692 {
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2693 const long x2= x<<2;
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2694 d[8*x+0]=yp[x2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2695 d[8*x+1]=up[x];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2696 d[8*x+2]=yp[x2+1];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2697 d[8*x+3]=vp[x];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2698 d[8*x+4]=yp[x2+2];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2699 d[8*x+5]=up[x];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2700 d[8*x+6]=yp[x2+3];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2701 d[8*x+7]=vp[x];
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2702 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2703 }
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2704 #ifdef HAVE_MMX
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2705 asm(
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2706 EMMS" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2707 SFENCE" \n\t"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2708 ::: "memory"
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2709 );
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2710 #endif
8579acff875e Move postproc ---> libswscale lucabe parents: diff changeset	2711 }
22960 2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2712
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2713 static inline void RENAME(rgb2rgb_init)(void){
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2714 rgb15to16= RENAME(rgb15to16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2715 rgb15to24= RENAME(rgb15to24);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2716 rgb15to32= RENAME(rgb15to32);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2717 rgb16to24= RENAME(rgb16to24);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2718 rgb16to32= RENAME(rgb16to32);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2719 rgb16to15= RENAME(rgb16to15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2720 rgb24to16= RENAME(rgb24to16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2721 rgb24to15= RENAME(rgb24to15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2722 rgb24to32= RENAME(rgb24to32);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2723 rgb32to16= RENAME(rgb32to16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2724 rgb32to15= RENAME(rgb32to15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2725 rgb32to24= RENAME(rgb32to24);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2726 rgb24tobgr15= RENAME(rgb24tobgr15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2727 rgb24tobgr16= RENAME(rgb24tobgr16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2728 rgb24tobgr24= RENAME(rgb24tobgr24);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2729 rgb32tobgr32= RENAME(rgb32tobgr32);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2730 rgb32tobgr16= RENAME(rgb32tobgr16);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2731 rgb32tobgr15= RENAME(rgb32tobgr15);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2732 yv12toyuy2= RENAME(yv12toyuy2);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2733 yv12touyvy= RENAME(yv12touyvy);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2734 yuv422ptoyuy2= RENAME(yuv422ptoyuy2);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2735 yuy2toyv12= RENAME(yuy2toyv12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2736 // uyvytoyv12= RENAME(uyvytoyv12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2737 // yvu9toyv12= RENAME(yvu9toyv12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2738 planar2x= RENAME(planar2x);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2739 rgb24toyv12= RENAME(rgb24toyv12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2740 interleaveBytes= RENAME(interleaveBytes);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2741 vu9_to_vu12= RENAME(vu9_to_vu12);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2742 yvu9_to_yuy2= RENAME(yvu9_to_yuy2);
2d1ad4285df4 Remove code duplication in sws_rgb2rgb_init. ivo parents: 21029 diff changeset	2743 }

Mercurial > mplayer.hg

annotate libswscale/rgb2rgb_template.c @ 23125:48fdbc777870