diff postproc/swscale_template.c @ 4612:4edfdec1dc12

bgr24toY in MMX
author michael
date Sat, 09 Feb 2002 17:03:53 +0000
parents f01ed4dfa868
children ef213d64e20a
line wrap: on
line diff
--- a/postproc/swscale_template.c	Sat Feb 09 15:14:48 2002 +0000
+++ b/postproc/swscale_template.c	Sat Feb 09 17:03:53 2002 +0000
@@ -1635,7 +1635,76 @@
 
 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width)
 {
-#ifdef HAVE_MMXFIXME
+#ifdef HAVE_MMX
+	asm volatile(
+		"movl %2, %%eax			\n\t"
+		"movq bgr2YCoeff, %%mm6		\n\t"
+		"movq w1111, %%mm5		\n\t"
+		"pxor %%mm7, %%mm7		\n\t"
+		"leal (%%eax, %%eax, 2), %%ebx	\n\t"
+		".balign 16			\n\t"
+		"1:				\n\t"
+		PREFETCH" 64(%0, %%ebx)		\n\t"
+		"movd (%0, %%ebx), %%mm0	\n\t"
+		"movd 3(%0, %%ebx), %%mm1	\n\t"
+		"punpcklbw %%mm7, %%mm0		\n\t"
+		"punpcklbw %%mm7, %%mm1		\n\t"
+		"movd 6(%0, %%ebx), %%mm2	\n\t"
+		"movd 9(%0, %%ebx), %%mm3	\n\t"
+		"punpcklbw %%mm7, %%mm2		\n\t"
+		"punpcklbw %%mm7, %%mm3		\n\t"
+		"pmaddwd %%mm6, %%mm0		\n\t"
+		"pmaddwd %%mm6, %%mm1		\n\t"
+		"pmaddwd %%mm6, %%mm2		\n\t"
+		"pmaddwd %%mm6, %%mm3		\n\t"
+#ifndef FAST_BGR2YV12
+		"psrad $8, %%mm0		\n\t"
+		"psrad $8, %%mm1		\n\t"
+		"psrad $8, %%mm2		\n\t"
+		"psrad $8, %%mm3		\n\t"
+#endif
+		"packssdw %%mm1, %%mm0		\n\t"
+		"packssdw %%mm3, %%mm2		\n\t"
+		"pmaddwd %%mm5, %%mm0		\n\t"
+		"pmaddwd %%mm5, %%mm2		\n\t"
+		"packssdw %%mm2, %%mm0		\n\t"
+		"psraw $7, %%mm0		\n\t"
+
+		"movd 12(%0, %%ebx), %%mm4	\n\t"
+		"movd 15(%0, %%ebx), %%mm1	\n\t"
+		"punpcklbw %%mm7, %%mm4		\n\t"
+		"punpcklbw %%mm7, %%mm1		\n\t"
+		"movd 18(%0, %%ebx), %%mm2	\n\t"
+		"movd 21(%0, %%ebx), %%mm3	\n\t"
+		"punpcklbw %%mm7, %%mm2		\n\t"
+		"punpcklbw %%mm7, %%mm3		\n\t"
+		"pmaddwd %%mm6, %%mm4		\n\t"
+		"pmaddwd %%mm6, %%mm1		\n\t"
+		"pmaddwd %%mm6, %%mm2		\n\t"
+		"pmaddwd %%mm6, %%mm3		\n\t"
+#ifndef FAST_BGR2YV12
+		"psrad $8, %%mm4		\n\t"
+		"psrad $8, %%mm1		\n\t"
+		"psrad $8, %%mm2		\n\t"
+		"psrad $8, %%mm3		\n\t"
+#endif
+		"packssdw %%mm1, %%mm4		\n\t"
+		"packssdw %%mm3, %%mm2		\n\t"
+		"pmaddwd %%mm5, %%mm4		\n\t"
+		"pmaddwd %%mm5, %%mm2		\n\t"
+		"addl $24, %%ebx		\n\t"
+		"packssdw %%mm2, %%mm4		\n\t"
+		"psraw $7, %%mm4		\n\t"
+
+		"packuswb %%mm4, %%mm0		\n\t"
+		"paddusb bgr2YOffset, %%mm0	\n\t"
+
+		MOVNTQ(%%mm0, (%1, %%eax))
+		"addl $8, %%eax			\n\t"
+		" js 1b				\n\t"
+		: : "r" (src+width*3), "r" (dst+width), "g" (-width)
+		: "%eax", "%ebx"
+	);
 #else
 	int i;
 	for(i=0; i<width; i++)