view libmpeg2/cpu_accel.c @ 37195:ac6c37d85d65 default tip

configure: Fix initialization of variable def_local_aligned_32 It contiained the #define of HAVE_LOCAL_ALIGNED_16 instead of HAVE_LOCAL_ALIGNED_32.
author al
date Sun, 28 Sep 2014 18:38:41 +0000
parents e83eef58b30a
children
line wrap: on
line source

/*
 * cpu_accel.c
 * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org>
 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
 *
 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 * See http://libmpeg2.sourceforge.net/ for updates.
 *
 * mpeg2dec is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * mpeg2dec is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Modified for use with MPlayer, see libmpeg2_changes.diff for the exact changes.
 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
 * $Id$
 */

#include "config.h"

#include <inttypes.h>

#include "mpeg2.h"
#include "attributes.h"
#include "mpeg2_internal.h"

#include "cpudetect.h"

#if ARCH_X86 || ARCH_X86_64
static inline uint32_t arch_accel (uint32_t accel)
{
/* Use MPlayer CPU detection instead of libmpeg2 variant. */
#if 0
    if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT))
	accel |= MPEG2_ACCEL_X86_MMX;

    if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3))
	accel |= MPEG2_ACCEL_X86_MMXEXT;

    if (accel & (MPEG2_ACCEL_X86_SSE3))
	accel |= MPEG2_ACCEL_X86_SSE2;

#ifdef ACCEL_DETECT
    if (accel & MPEG2_ACCEL_DETECT) {
	uint32_t eax, ebx, ecx, edx;
	int AMD;

#if defined(__x86_64__) || (!defined(PIC) && !defined(__PIC__))
#define cpuid(op,eax,ebx,ecx,edx)	\
    __asm__ ("cpuid"			\
	     : "=a" (eax),		\
	       "=b" (ebx),		\
	       "=c" (ecx),		\
	       "=d" (edx)		\
	     : "a" (op)			\
	     : "cc")
#else	/* PIC version : save ebx (not needed on x86_64) */
#define cpuid(op,eax,ebx,ecx,edx)	\
    __asm__ ("pushl %%ebx\n\t"		\
	     "cpuid\n\t"		\
	     "movl %%ebx,%1\n\t"	\
	     "popl %%ebx"		\
	     : "=a" (eax),		\
	       "=r" (ebx),		\
	       "=c" (ecx),		\
	       "=d" (edx)		\
	     : "a" (op)			\
	     : "cc")
#endif

#ifndef __x86_64__ /* x86_64 supports the cpuid op */
	__asm__ ("pushf\n\t"
		 "pushf\n\t"
		 "pop %0\n\t"
		 "movl %0,%1\n\t"
		 "xorl $0x200000,%0\n\t"
		 "push %0\n\t"
		 "popf\n\t"
		 "pushf\n\t"
		 "pop %0\n\t"
		 "popf"
		 : "=r" (eax),
		 "=r" (ebx)
		 :
		 : "cc");

	if (eax == ebx)			/* no cpuid */
	    return accel;
#endif

	cpuid (0x00000000, eax, ebx, ecx, edx);
	if (!eax)			/* vendor string only */
	    return accel;

	AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65);

	cpuid (0x00000001, eax, ebx, ecx, edx);
	if (! (edx & 0x00800000))	/* no MMX */
	    return accel;

	accel |= MPEG2_ACCEL_X86_MMX;
	if (edx & 0x02000000)		/* SSE - identical to AMD MMX ext. */
	    accel |= MPEG2_ACCEL_X86_MMXEXT;

	if (edx & 0x04000000)		/* SSE2 */
	    accel |= MPEG2_ACCEL_X86_SSE2;

	if (ecx & 0x00000001)		/* SSE3 */
	    accel |= MPEG2_ACCEL_X86_SSE3;

	cpuid (0x80000000, eax, ebx, ecx, edx);
	if (eax < 0x80000001)		/* no extended capabilities */
	    return accel;

	cpuid (0x80000001, eax, ebx, ecx, edx);

	if (edx & 0x80000000)
	    accel |= MPEG2_ACCEL_X86_3DNOW;

	if (AMD && (edx & 0x00400000))	/* AMD MMX extensions */
	    accel |= MPEG2_ACCEL_X86_MMXEXT;
    }
#endif /* ACCEL_DETECT */

    return accel;

#else /* 0 */
    accel = 0;
    if (gCpuCaps.hasMMX)
        accel |= MPEG2_ACCEL_X86_MMX;
    if (gCpuCaps.hasSSE2)
	accel |= MPEG2_ACCEL_X86_SSE2;
    if (gCpuCaps.hasMMX2)
	accel |= MPEG2_ACCEL_X86_MMXEXT;
    if (gCpuCaps.has3DNow)
	accel |= MPEG2_ACCEL_X86_3DNOW;

    return accel;

#endif /* 0 */
}
#endif /* ARCH_X86 || ARCH_X86_64 */

#if defined(ACCEL_DETECT) && (ARCH_PPC || ARCH_SPARC)
#include <signal.h>
#include <setjmp.h>

static sigjmp_buf jmpbuf;
static volatile sig_atomic_t canjump = 0;

static RETSIGTYPE sigill_handler (int sig)
{
    if (!canjump) {
	signal (sig, SIG_DFL);
	raise (sig);
    }

    canjump = 0;
    siglongjmp (jmpbuf, 1);
}
#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */

#if ARCH_PPC
static uint32_t arch_accel (uint32_t accel)
{
#ifdef ACCEL_DETECT
    if ((accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT)) ==
	MPEG2_ACCEL_DETECT) {
	static RETSIGTYPE (* oldsig) (int);

	oldsig = signal (SIGILL, sigill_handler);
	if (sigsetjmp (jmpbuf, 1)) {
	    signal (SIGILL, oldsig);
	    return accel;
	}

	canjump = 1;

#if defined(__APPLE_CC__)	/* apple */
#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
#else				/* gnu */
#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
#endif
	asm volatile ("mtspr 256, %0\n\t"
		      VAND (0, 0, 0)
		      :
		      : "r" (-1));

	canjump = 0;
	accel |= MPEG2_ACCEL_PPC_ALTIVEC;

	signal (SIGILL, oldsig);
    }
#endif /* ACCEL_DETECT */

    return accel;
}
#endif /* ARCH_PPC */

#if ARCH_SPARC
static uint32_t arch_accel (uint32_t accel)
{
    if (accel & MPEG2_ACCEL_SPARC_VIS2)
	accel |= MPEG2_ACCEL_SPARC_VIS;

#ifdef ACCEL_DETECT
    if ((accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT)) ==
	MPEG2_ACCEL_DETECT) {
	static RETSIGTYPE (* oldsig) (int);

	oldsig = signal (SIGILL, sigill_handler);
	if (sigsetjmp (jmpbuf, 1)) {
	    signal (SIGILL, oldsig);
	    return accel;
	}

	canjump = 1;

	/* pdist %f0, %f0, %f0 */
	__asm__ __volatile__(".word\t0x81b007c0");

	canjump = 0;
	accel |= MPEG2_ACCEL_SPARC_VIS;

	if (sigsetjmp (jmpbuf, 1)) {
	    signal (SIGILL, oldsig);
	    return accel;
	}

	canjump = 1;

	/* edge8n %g0, %g0, %g0 */
	__asm__ __volatile__(".word\t0x81b00020");

	canjump = 0;
	accel |= MPEG2_ACCEL_SPARC_VIS2;

	signal (SIGILL, oldsig);
    }
#endif /* ACCEL_DETECT */

    return accel;
}
#endif /* ARCH_SPARC */

#if ARCH_ALPHA
static inline uint32_t arch_accel (uint32_t accel)
{
    if (accel & MPEG2_ACCEL_ALPHA_MVI)
	accel |= MPEG2_ACCEL_ALPHA;

#ifdef ACCEL_DETECT
    if (accel & MPEG2_ACCEL_DETECT) {
	uint64_t no_mvi;

	asm volatile ("amask %1, %0"
		      : "=r" (no_mvi)
		      : "rI" (256));	/* AMASK_MVI */
	accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA |
					       MPEG2_ACCEL_ALPHA_MVI);
    }
#endif /* ACCEL_DETECT */

    return accel;
}
#endif /* ARCH_ALPHA */

uint32_t mpeg2_detect_accel (uint32_t accel)
{
#if ARCH_X86 || ARCH_X86_64 || ARCH_PPC || ARCH_ALPHA || ARCH_SPARC
    accel = arch_accel (accel);
#endif
    return accel;
}