view libvo/osd_template.c @ 22830:1d4a455af876

Set CONFIG_EBP_AVAILABLE, CONFIG_EBX_AVAILABLE for FFmpeg After FFmpeg r8549 these variables are used in libavcodec to determine whether x86 inline asm sections using these registers or requiring a certain total number of total free registers are enabled. Because they were not set by MPlayer configure some H264 decoding optimizations were disabled after that FFmpeg version. This change sets the variables to true unconditionally which should restore previous behavior. Adding proper detection is left for later. EBX should always be available because internal libavcodec is never compiled with PIC. However if -fomit-frame-pointer is not used because of --enable-debug then EBP is not available. Thus proper detection would be preferable to fix compilation with --enable-debug on x86. Currently the variables are also set on non-x86 which should be harmless even if somewhat ugly.
author uau
date Fri, 30 Mar 2007 22:57:04 +0000
parents 9d42ff736ea5
children 7a1397677cb3
line wrap: on
line source

// Generic alpha renderers for all YUV modes and RGB depths.
// Optimized by Nick and Michael
// Code from Michael Niedermayer (michaelni@gmx.at) is under GPL

#undef PREFETCH
#undef EMMS
#undef PREFETCHW
#undef PAVGB

#ifdef HAVE_3DNOW
#define PREFETCH  "prefetch"
#define PREFETCHW "prefetchw"
#define PAVGB	  "pavgusb"
#elif defined ( HAVE_MMX2 )
#define PREFETCH "prefetchnta"
#define PREFETCHW "prefetcht0"
#define PAVGB	  "pavgb"
#else
#define PREFETCH "/nop"
#define PREFETCHW "/nop"
#endif

#ifdef HAVE_3DNOW
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
#define EMMS     "femms"
#else
#define EMMS     "emms"
#endif

static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
    int y;
#if defined(FAST_OSD) && !defined(HAVE_MMX)
    w=w>>1;
#endif
#ifdef HAVE_MMX
    asm volatile(
        "pcmpeqb %%mm5, %%mm5\n\t" // F..F
        "movq %%mm5, %%mm4\n\t"
        "movq %%mm5, %%mm7\n\t"
        "psllw $8, %%mm5\n\t" //FF00FF00FF00
        "psrlw $8, %%mm4\n\t" //00FF00FF00FF
        ::);        
#endif
    for(y=0;y<h;y++){
        register int x;
#ifdef HAVE_MMX
    asm volatile(
	PREFETCHW" %0\n\t"
	PREFETCH" %1\n\t"
	PREFETCH" %2\n\t"
	::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
    for(x=0;x<w;x+=8){
	asm volatile(
		"movl %1, %%eax\n\t"
		"orl 4%1, %%eax\n\t"
		" jz 1f\n\t"
		PREFETCHW" 32%0\n\t"
		PREFETCH" 32%1\n\t"
		PREFETCH" 32%2\n\t"
		"movq	%0, %%mm0\n\t" // dstbase
		"movq	%%mm0, %%mm1\n\t"
		"pand %%mm4, %%mm0\n\t" 	//0Y0Y0Y0Y
		"psrlw $8, %%mm1\n\t"		//0Y0Y0Y0Y
		"movq	%1, %%mm2\n\t" 		//srca HGFEDCBA
		"paddb	%%mm7, %%mm2\n\t"
		"movq %%mm2, %%mm3\n\t"
		"pand %%mm4, %%mm2\n\t" 	//0G0E0C0A
		"psrlw $8, %%mm3\n\t"		//0H0F0D0B
		"pmullw	%%mm2, %%mm0\n\t"
		"pmullw	%%mm3, %%mm1\n\t"
		"psrlw	$8, %%mm0\n\t"
		"pand %%mm5, %%mm1\n\t"
		"por %%mm1, %%mm0\n\t"
		"paddb	%2, %%mm0\n\t"
		"movq	%%mm0, %0\n\t"
		"1:\n\t"
		:: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x])
		: "%eax");
	}
#else
        for(x=0;x<w;x++){
#ifdef FAST_OSD
            if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0];
            if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1];
#else
            if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x];
#endif
        }
#endif
        src+=srcstride;
        srca+=srcstride;
        dstbase+=dststride;
    }
#ifdef HAVE_MMX
	asm volatile(EMMS:::"memory");
#endif
    return;
}

static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
    int y;
#if defined(FAST_OSD) && !defined(HAVE_MMX)
    w=w>>1;
#endif
#ifdef HAVE_MMX
    asm volatile(
        "pxor %%mm7, %%mm7\n\t"
        "pcmpeqb %%mm5, %%mm5\n\t" // F..F
        "movq %%mm5, %%mm6\n\t"
        "movq %%mm5, %%mm4\n\t"
        "psllw $8, %%mm5\n\t" //FF00FF00FF00
        "psrlw $8, %%mm4\n\t" //00FF00FF00FF
        ::);        
#endif
    for(y=0;y<h;y++){
        register int x;
#ifdef HAVE_MMX
    asm volatile(
	PREFETCHW" %0\n\t"
	PREFETCH" %1\n\t"
	PREFETCH" %2\n\t"
	::"m"(*dstbase),"m"(*srca),"m"(*src));
    for(x=0;x<w;x+=4){
	asm volatile(
		"movl %1, %%eax\n\t"
		"orl %%eax, %%eax\n\t"
		" jz 1f\n\t"
		PREFETCHW" 32%0\n\t"
		PREFETCH" 32%1\n\t"
		PREFETCH" 32%2\n\t"
		"movq	%0, %%mm0\n\t" // dstbase
		"movq	%%mm0, %%mm1\n\t"
		"pand %%mm4, %%mm0\n\t" 	//0Y0Y0Y0Y
		"movd	%%eax, %%mm2\n\t"	//srca 0000DCBA
		"paddb	%%mm6, %%mm2\n\t"
		"punpcklbw %%mm7, %%mm2\n\t"	//srca 0D0C0B0A
		"pmullw	%%mm2, %%mm0\n\t"
		"psrlw	$8, %%mm0\n\t"
		"pand %%mm5, %%mm1\n\t" 	//U0V0U0V0
		"movd %2, %%mm2\n\t"		//src 0000DCBA
		"punpcklbw %%mm7, %%mm2\n\t"	//srca 0D0C0B0A
		"por %%mm1, %%mm0\n\t"
		"paddb	%%mm2, %%mm0\n\t"
		"movq	%%mm0, %0\n\t"
		"1:\n\t"
		:: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x])
		: "%eax");
	}
#else
        for(x=0;x<w;x++){
#ifdef FAST_OSD
            if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0];
            if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1];
#else
            if(srca[x]) {
               dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x];
               dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128;
           }
#endif
        }
#endif
	src+=srcstride;
        srca+=srcstride;
        dstbase+=dststride;
    }
#ifdef HAVE_MMX
	asm volatile(EMMS:::"memory");
#endif
    return;
}

static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
  int y;
#if defined(FAST_OSD)
  w=w>>1;
#endif
  for(y=0;y<h;y++){
    register int x;
    for(x=0;x<w;x++){
#ifdef FAST_OSD
      if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0];
      if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1];
#else
      if(srca[x]) {
	dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x];
	dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128;
      }
#endif
    }
    src+=srcstride;
    srca+=srcstride;
    dstbase+=dststride;
  }
}

static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
    int y;
#ifdef HAVE_MMX
    asm volatile(
        "pxor %%mm7, %%mm7\n\t"
        "pcmpeqb %%mm6, %%mm6\n\t" // F..F
        ::);        
#endif
    for(y=0;y<h;y++){
        register unsigned char *dst = dstbase;
        register int x;
#if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX))
#ifdef HAVE_MMX
    asm volatile(
	PREFETCHW" %0\n\t"
	PREFETCH" %1\n\t"
	PREFETCH" %2\n\t"
	::"m"(*dst),"m"(*srca),"m"(*src):"memory");
    for(x=0;x<w;x+=2){
     if(srca[x] || srca[x+1])
	asm volatile(
		PREFETCHW" 32%0\n\t"
		PREFETCH" 32%1\n\t"
		PREFETCH" 32%2\n\t"
		"movq	%0, %%mm0\n\t" // dstbase
		"movq	%%mm0, %%mm1\n\t"
		"movq	%%mm0, %%mm5\n\t"
		"punpcklbw %%mm7, %%mm0\n\t"
		"punpckhbw %%mm7, %%mm1\n\t"
		"movd	%1, %%mm2\n\t" // srca ABCD0000
		"paddb	%%mm6, %%mm2\n\t"
		"punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
		"punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
		"psrlq  $8, %%mm2\n\t" // srca AAABBBB0
		"movq	%%mm2, %%mm3\n\t"
		"punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B
		"punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00
		"pmullw	%%mm2, %%mm0\n\t"
		"pmullw	%%mm3, %%mm1\n\t"
		"psrlw	$8, %%mm0\n\t"
		"psrlw	$8, %%mm1\n\t"
		"packuswb %%mm1, %%mm0\n\t"
		"movd %2, %%mm2	\n\t" // src ABCD0000
		"punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
		"punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
		"psrlq  $8, %%mm2\n\t" // src AAABBBB0
		"paddb	%%mm2, %%mm0\n\t"
		"pand	%4, %%mm5\n\t"
		"pand	%3, %%mm0\n\t"
		"por	%%mm0, %%mm5\n\t"
		"movq	%%mm5, %0\n\t"
		:: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh));
		dst += 6;
	}
#else /* HAVE_MMX */
    for(x=0;x<w;x++){
        if(srca[x]){
	    asm volatile(
		"movzbl (%0), %%ecx\n\t"
		"movzbl 1(%0), %%eax\n\t"

		"imull %1, %%ecx\n\t"
		"imull %1, %%eax\n\t"

		"addl %2, %%ecx\n\t"
		"addl %2, %%eax\n\t"

		"movb %%ch, (%0)\n\t"
		"movb %%ah, 1(%0)\n\t"
		
                "movzbl 2(%0), %%eax\n\t"
		"imull %1, %%eax\n\t"
		"addl %2, %%eax\n\t"
		"movb %%ah, 2(%0)\n\t"
		:
		:"D" (dst),
		 "r" ((unsigned)srca[x]),
		 "r" (((unsigned)src[x])<<8)
		:"%eax", "%ecx"
		);
            }
	    dst += 3;
        }
#endif /* !HAVE_MMX */
#else /*non x86 arch or x86_64 with MMX disabled */
        for(x=0;x<w;x++){
            if(srca[x]){
#ifdef FAST_OSD
		dst[0]=dst[1]=dst[2]=src[x];
#else
		dst[0]=((dst[0]*srca[x])>>8)+src[x];
		dst[1]=((dst[1]*srca[x])>>8)+src[x];
		dst[2]=((dst[2]*srca[x])>>8)+src[x];
#endif
            }
            dst+=3; // 24bpp
        }
#endif /* arch_x86 */
        src+=srcstride;
        srca+=srcstride;
        dstbase+=dststride;
    }
#ifdef HAVE_MMX
	asm volatile(EMMS:::"memory");
#endif
    return;
}

static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
    int y;
#ifdef WORDS_BIGENDIAN
    dstbase++;
#endif
#ifdef HAVE_MMX
#ifdef HAVE_3DNOW
    asm volatile(
        "pxor %%mm7, %%mm7\n\t"
        "pcmpeqb %%mm6, %%mm6\n\t" // F..F
        ::);
#else /* HAVE_3DNOW */
    asm volatile(
        "pxor %%mm7, %%mm7\n\t"
        "pcmpeqb %%mm5, %%mm5\n\t" // F..F
        "movq %%mm5, %%mm4\n\t"
        "psllw $8, %%mm5\n\t" //FF00FF00FF00
        "psrlw $8, %%mm4\n\t" //00FF00FF00FF
        ::);
#endif /* HAVE_3DNOW */
#endif /* HAVE_MMX */
    for(y=0;y<h;y++){
        register int x;
#if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX))
#ifdef HAVE_MMX
#ifdef HAVE_3DNOW
    asm volatile(
	PREFETCHW" %0\n\t"
	PREFETCH" %1\n\t"
	PREFETCH" %2\n\t"
	::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
    for(x=0;x<w;x+=2){
     if(srca[x] || srca[x+1])
	asm volatile(
		PREFETCHW" 32%0\n\t"
		PREFETCH" 32%1\n\t"
		PREFETCH" 32%2\n\t"
		"movq	%0, %%mm0\n\t" // dstbase
		"movq	%%mm0, %%mm1\n\t"
		"punpcklbw %%mm7, %%mm0\n\t"
		"punpckhbw %%mm7, %%mm1\n\t"
		"movd	%1, %%mm2\n\t" // srca ABCD0000
		"paddb	%%mm6, %%mm2\n\t"
		"punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
		"punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
		"movq	%%mm2, %%mm3\n\t"
		"punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
		"punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
		"pmullw	%%mm2, %%mm0\n\t"
		"pmullw	%%mm3, %%mm1\n\t"
		"psrlw	$8, %%mm0\n\t"
		"psrlw	$8, %%mm1\n\t"
		"packuswb %%mm1, %%mm0\n\t"
		"movd %2, %%mm2	\n\t" // src ABCD0000
		"punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
		"punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
		"paddb	%%mm2, %%mm0\n\t"
		"movq	%%mm0, %0\n\t"
		:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
	}
#else //this is faster for intels crap
    asm volatile(
	PREFETCHW" %0\n\t"
	PREFETCH" %1\n\t"
	PREFETCH" %2\n\t"
	::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
    for(x=0;x<w;x+=4){
	asm volatile(
		"movl %1, %%eax\n\t"
		"orl %%eax, %%eax\n\t"
		" jz 1f\n\t"
		PREFETCHW" 32%0\n\t"
		PREFETCH" 32%1\n\t"
		PREFETCH" 32%2\n\t"
		"movq	%0, %%mm0\n\t" // dstbase
		"movq	%%mm0, %%mm1\n\t"
		"pand %%mm4, %%mm0\n\t" 	//0R0B0R0B
		"psrlw $8, %%mm1\n\t"		//0?0G0?0G
		"movd	%%eax, %%mm2\n\t" 	//srca 0000DCBA
		"paddb	"MANGLE(bFF)", %%mm2\n\t"
		"punpcklbw %%mm2, %%mm2\n\t"	//srca DDCCBBAA
		"movq %%mm2, %%mm3\n\t"
		"punpcklbw %%mm7, %%mm2\n\t"	//srca 0B0B0A0A
		"pmullw	%%mm2, %%mm0\n\t"
		"pmullw	%%mm2, %%mm1\n\t"
		"psrlw	$8, %%mm0\n\t"
		"pand %%mm5, %%mm1\n\t"
		"por %%mm1, %%mm0\n\t"
		"movd %2, %%mm2	\n\t"		//src 0000DCBA
		"punpcklbw %%mm2, %%mm2\n\t" 	//src DDCCBBAA
		"movq %%mm2, %%mm6\n\t"
		"punpcklbw %%mm2, %%mm2\n\t"	//src BBBBAAAA
		"paddb	%%mm2, %%mm0\n\t"
		"movq	%%mm0, %0\n\t"

		"movq	8%0, %%mm0\n\t" // dstbase
		"movq	%%mm0, %%mm1\n\t"
		"pand %%mm4, %%mm0\n\t" 	//0R0B0R0B
		"psrlw $8, %%mm1\n\t"		//0?0G0?0G
		"punpckhbw %%mm7, %%mm3\n\t"	//srca 0D0D0C0C
		"pmullw	%%mm3, %%mm0\n\t"
		"pmullw	%%mm3, %%mm1\n\t"
		"psrlw	$8, %%mm0\n\t"
		"pand %%mm5, %%mm1\n\t"
		"por %%mm1, %%mm0\n\t"
		"punpckhbw %%mm6, %%mm6\n\t"	//src DDDDCCCC
		"paddb	%%mm6, %%mm0\n\t"
		"movq	%%mm0, 8%0\n\t"
		"1:\n\t"
		:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])
		: "%eax");
	}
#endif
#else /* HAVE_MMX */
    for(x=0;x<w;x++){
        if(srca[x]){
	    asm volatile(
		"movzbl (%0), %%ecx\n\t"
		"movzbl 1(%0), %%eax\n\t"
		"movzbl 2(%0), %%edx\n\t"

		"imull %1, %%ecx\n\t"
		"imull %1, %%eax\n\t"
		"imull %1, %%edx\n\t"

 		"addl %2, %%ecx\n\t"
		"addl %2, %%eax\n\t"
		"addl %2, %%edx\n\t"

		"movb %%ch, (%0)\n\t"
		"movb %%ah, 1(%0)\n\t"
		"movb %%dh, 2(%0)\n\t"

		:
		:"r" (&dstbase[4*x]),
		 "r" ((unsigned)srca[x]),
		 "r" (((unsigned)src[x])<<8)
		:"%eax", "%ecx", "%edx"
		);
            }
        }
#endif /* HAVE_MMX */
#else /*non x86 arch or x86_64 with MMX disabled */
        for(x=0;x<w;x++){
            if(srca[x]){
#ifdef FAST_OSD
		dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x];
#else
		dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x];
		dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x];
		dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x];
#endif
            }
        }
#endif /* arch_x86 */
        src+=srcstride;
        srca+=srcstride;
        dstbase+=dststride;
    }
#ifdef HAVE_MMX
	asm volatile(EMMS:::"memory");
#endif
    return;
}