view cpudetect.c @ 29214:a1abd8d51b81

Change VOFW for x86 to 5120, it allows larger images to be scaled and was not slower. Other archs are not changed as the larger VOFW was slower on PPC.
author michael
date Tue, 05 May 2009 01:34:16 +0000
parents 06540eb5ef6a
children 0f1b5b68af32
line wrap: on
line source

#include "config.h"
#include "cpudetect.h"
#include "mp_msg.h"

CpuCaps gCpuCaps;

#include <stdlib.h>

#if ARCH_X86

#include <stdio.h>
#include <string.h>

#if defined (__NetBSD__) || defined(__OpenBSD__)
#include <sys/param.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
#include <sys/types.h>
#include <sys/sysctl.h>
#elif defined(__linux__)
#include <signal.h>
#elif defined(__MINGW32__) || defined(__CYGWIN__)
#include <windows.h>
#elif defined(__OS2__)
#define INCL_DOS
#include <os2.h>
#elif defined(__AMIGAOS4__)
#include <proto/exec.h>
#endif

/* Thanks to the FreeBSD project for some of this cpuid code, and 
 * help understanding how to use it.  Thanks to the Mesa 
 * team for SSE support detection and more cpu detect code.
 */

/* I believe this code works.  However, it has only been used on a PII and PIII */

static void check_os_katmai_support( void );

// return TRUE if cpuid supported
static int has_cpuid(void)
{
// code from libavcodec:
#if ARCH_X86_64
   return 1;
#else
	long a, c;
    __asm__ volatile (
                          /* See if CPUID instruction is supported ... */
                          /* ... Get copies of EFLAGS into eax and ecx */
                          "pushfl\n\t"
                          "pop %0\n\t"
                          "mov %0, %1\n\t"
                          
                          /* ... Toggle the ID bit in one copy and store */
                          /*     to the EFLAGS reg */
                          "xor $0x200000, %0\n\t"
                          "push %0\n\t"
                          "popfl\n\t"
                          
                          /* ... Get the (hopefully modified) EFLAGS */
                          "pushfl\n\t"
                          "pop %0\n\t"
                          : "=a" (a), "=c" (c)
                          :
                          : "cc" 
                          );

	return a != c;
#endif
}

static void
do_cpuid(unsigned int ax, unsigned int *p)
{
#if 0
	__asm__ volatile(
	"cpuid;"
	: "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
	:  "0" (ax)
	);
#else
// code from libavcodec:
    __asm__ volatile
	("mov %%"REG_b", %%"REG_S"\n\t"
         "cpuid\n\t"
         "xchg %%"REG_b", %%"REG_S
         : "=a" (p[0]), "=S" (p[1]), 
           "=c" (p[2]), "=d" (p[3])
         : "0" (ax));
#endif

}

void GetCpuCaps( CpuCaps *caps)
{
	unsigned int regs[4];
	unsigned int regs2[4];

	memset(caps, 0, sizeof(*caps));
	caps->isX86=1;
	caps->cl_size=32; /* default */
	if (!has_cpuid()) {
	    mp_msg(MSGT_CPUDETECT,MSGL_WARN,"CPUID not supported!??? (maybe an old 486?)\n");
	    return;
	}
	do_cpuid(0x00000000, regs); // get _max_ cpuid level and vendor name
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU vendor name: %.4s%.4s%.4s  max cpuid level: %d\n",
			(char*) (regs+1),(char*) (regs+3),(char*) (regs+2), regs[0]);
	if (regs[0]>=0x00000001)
	{
		char *tmpstr, *ptmpstr;
		unsigned cl_size;

		do_cpuid(0x00000001, regs2);

		caps->cpuType=(regs2[0] >> 8)&0xf;
		caps->cpuModel=(regs2[0] >> 4)&0xf;

// see AMD64 Architecture Programmer's Manual, Volume 3: General-purpose and
// System Instructions, Table 3-2: Effective family computation, page 120.
		if(caps->cpuType==0xf){
		    // use extended family (P4, IA64, K8)
		    caps->cpuType=0xf+((regs2[0]>>20)&255);
		}
		if(caps->cpuType==0xf || caps->cpuType==6)
		    caps->cpuModel |= ((regs2[0]>>16)&0xf) << 4;

		caps->cpuStepping=regs2[0] & 0xf;

		// general feature flags:
		caps->hasTSC  = (regs2[3] & (1 << 8  )) >>  8; // 0x0000010
		caps->hasMMX  = (regs2[3] & (1 << 23 )) >> 23; // 0x0800000
		caps->hasSSE  = (regs2[3] & (1 << 25 )) >> 25; // 0x2000000
		caps->hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; // 0x4000000
		caps->hasSSE3 = (regs2[2] & 1);                // 0x0000001
		caps->hasSSSE3 = (regs2[2] & (1 << 9 )) >>  9; // 0x0000200
		caps->hasMMX2 = caps->hasSSE; // SSE cpus supports mmxext too
		cl_size = ((regs2[1] >> 8) & 0xFF)*8;
		if(cl_size) caps->cl_size = cl_size;

		ptmpstr=tmpstr=GetCpuFriendlyName(regs, regs2);
		while(*ptmpstr == ' ')        // strip leading spaces
		    ptmpstr++;
		mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: %s ", ptmpstr);
		free(tmpstr);
		mp_msg(MSGT_CPUDETECT,MSGL_V,"(Family: %d, Model: %d, Stepping: %d)\n",
		    caps->cpuType, caps->cpuModel, caps->cpuStepping);

	}
	do_cpuid(0x80000000, regs);
	if (regs[0]>=0x80000001) {
		mp_msg(MSGT_CPUDETECT,MSGL_V,"extended cpuid-level: %d\n",regs[0]&0x7FFFFFFF);
		do_cpuid(0x80000001, regs2);
		caps->hasMMX  |= (regs2[3] & (1 << 23 )) >> 23; // 0x0800000
		caps->hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; // 0x400000
		caps->has3DNow    = (regs2[3] & (1 << 31 )) >> 31; //0x80000000
		caps->has3DNowExt = (regs2[3] & (1 << 30 )) >> 30;
		caps->hasSSE4a = (regs2[2] & (1 << 6 )) >>  6; // 0x0000040
	}
	if(regs[0]>=0x80000006)
	{
		do_cpuid(0x80000006, regs2);
		mp_msg(MSGT_CPUDETECT,MSGL_V,"extended cache-info: %d\n",regs2[2]&0x7FFFFFFF);
		caps->cl_size  = regs2[2] & 0xFF;
	}
	mp_msg(MSGT_CPUDETECT,MSGL_V,"Detected cache-line size is %u bytes\n",caps->cl_size);
#if 0
	mp_msg(MSGT_CPUDETECT,MSGL_INFO,"cpudetect: MMX=%d MMX2=%d SSE=%d SSE2=%d 3DNow=%d 3DNowExt=%d\n",
		gCpuCaps.hasMMX,
		gCpuCaps.hasMMX2,
		gCpuCaps.hasSSE,
		gCpuCaps.hasSSE2,
		gCpuCaps.has3DNow,
		gCpuCaps.has3DNowExt );
#endif

		/* FIXME: Does SSE2 need more OS support, too? */
#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
  || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
  || defined(__APPLE__) || defined(__CYGWIN__) || defined(__MINGW32__) \
  || defined(__OS2__)
		if (caps->hasSSE)
			check_os_katmai_support();
		if (!caps->hasSSE)
			caps->hasSSE2 = 0;
#else
		caps->hasSSE=0;
		caps->hasSSE2 = 0;
#endif
//		caps->has3DNow=1;
//		caps->hasMMX2 = 0;
//		caps->hasMMX = 0;

#if !CONFIG_RUNTIME_CPUDETECT
#if !HAVE_MMX
	if(caps->hasMMX) mp_msg(MSGT_CPUDETECT,MSGL_WARN,"MMX supported but disabled\n");
	caps->hasMMX=0;
#endif
#if !HAVE_MMX2
	if(caps->hasMMX2) mp_msg(MSGT_CPUDETECT,MSGL_WARN,"MMX2 supported but disabled\n");
	caps->hasMMX2=0;
#endif
#if !HAVE_SSE
	if(caps->hasSSE) mp_msg(MSGT_CPUDETECT,MSGL_WARN,"SSE supported but disabled\n");
	caps->hasSSE=0;
#endif
#if !HAVE_SSE2
	if(caps->hasSSE2) mp_msg(MSGT_CPUDETECT,MSGL_WARN,"SSE2 supported but disabled\n");
	caps->hasSSE2=0;
#endif
#if !HAVE_AMD3DNOW
	if(caps->has3DNow) mp_msg(MSGT_CPUDETECT,MSGL_WARN,"3DNow supported but disabled\n");
	caps->has3DNow=0;
#endif
#if !HAVE_AMD3DNOWEXT
	if(caps->has3DNowExt) mp_msg(MSGT_CPUDETECT,MSGL_WARN,"3DNowExt supported but disabled\n");
	caps->has3DNowExt=0;
#endif
#endif  // CONFIG_RUNTIME_CPUDETECT
}

char *GetCpuFriendlyName(unsigned int regs[], unsigned int regs2[]){
	char vendor[13];
	char *retname;
	int i;

	if (NULL==(retname=malloc(256))) {
		mp_msg(MSGT_CPUDETECT,MSGL_FATAL,"Error: GetCpuFriendlyName() not enough memory\n");
		exit(1);
	}
	retname[0] = '\0';

	sprintf(vendor,"%.4s%.4s%.4s",(char*)(regs+1),(char*)(regs+3),(char*)(regs+2));

	do_cpuid(0x80000000,regs);
	if (regs[0] >= 0x80000004)
	{
		// CPU has built-in namestring
		for (i = 0x80000002; i <= 0x80000004; i++)
		{
			do_cpuid(i, regs);
			strncat(retname, (char*)regs, 16);
		}
	}
	return retname;
}

#if defined(__linux__) && defined(_POSIX_SOURCE) && !ARCH_X86_64
static void sigill_handler_sse( int signal, struct sigcontext sc )
{
   mp_msg(MSGT_CPUDETECT,MSGL_V, "SIGILL, " );

   /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
    * instructions are 3 bytes long.  We must increment the instruction
    * pointer manually to avoid repeated execution of the offending
    * instruction.
    *
    * If the SIGILL is caused by a divide-by-zero when unmasked
    * exceptions aren't supported, the SIMD FPU status and control
    * word will be restored at the end of the test, so we don't need
    * to worry about doing it here.  Besides, we may not be able to...
    */
   sc.eip += 3;

   gCpuCaps.hasSSE=0;
}
#endif /* __linux__ && _POSIX_SOURCE */

#if (defined(__MINGW32__) || defined(__CYGWIN__)) && !ARCH_X86_64
LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
{
   if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
      mp_msg(MSGT_CPUDETECT,MSGL_V, "SIGILL, " );
      ep->ContextRecord->Eip +=3;
      gCpuCaps.hasSSE=0;       
	  return EXCEPTION_CONTINUE_EXECUTION;
   }
   return EXCEPTION_CONTINUE_SEARCH;
}
#endif /* defined(__MINGW32__) || defined(__CYGWIN__) */

#ifdef __OS2__
ULONG _System os2_sig_handler_sse( PEXCEPTIONREPORTRECORD       p1,
                                   PEXCEPTIONREGISTRATIONRECORD p2,
                                   PCONTEXTRECORD               p3,
                                   PVOID                        p4 )
{
   if(p1->ExceptionNum == XCPT_ILLEGAL_INSTRUCTION){
      mp_msg(MSGT_CPUDETECT, MSGL_V, "SIGILL, ");

      p3->ctx_RegEip += 3;
      gCpuCaps.hasSSE = 0;

      return XCPT_CONTINUE_EXECUTION;
   }
   return XCPT_CONTINUE_SEARCH;
}
#endif

/* If we're running on a processor that can do SSE, let's see if we
 * are allowed to or not.  This will catch 2.4.0 or later kernels that
 * haven't been configured for a Pentium III but are running on one,
 * and RedHat patched 2.2 kernels that have broken exception handling
 * support for user space apps that do SSE.
 */
 
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
#define SSE_SYSCTL_NAME "hw.instruction_sse"
#elif defined(__APPLE__)
#define SSE_SYSCTL_NAME "hw.optional.sse"
#endif

static void check_os_katmai_support( void )
{
#if ARCH_X86_64
   gCpuCaps.hasSSE=1;
   gCpuCaps.hasSSE2=1;
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
   int has_sse=0, ret;
   size_t len=sizeof(has_sse);

   ret = sysctlbyname(SSE_SYSCTL_NAME, &has_sse, &len, NULL, 0);
   if (ret || !has_sse)
      gCpuCaps.hasSSE=0;

#elif defined(__NetBSD__) || defined (__OpenBSD__)
#if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__)
   int has_sse, has_sse2, ret, mib[2];
   size_t varlen;

   mib[0] = CTL_MACHDEP;
   mib[1] = CPU_SSE;
   varlen = sizeof(has_sse);

   mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
   ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
   gCpuCaps.hasSSE = ret >= 0 && has_sse;
   mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );

   mib[1] = CPU_SSE2;
   varlen = sizeof(has_sse2);
   mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE2... " );
   ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
   gCpuCaps.hasSSE2 = ret >= 0 && has_sse2;
   mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE2 ? "yes.\n" : "no!\n" );
#else
   gCpuCaps.hasSSE = 0;
   mp_msg(MSGT_CPUDETECT,MSGL_WARN, "No OS support for SSE, disabling to be safe.\n" );
#endif
#elif defined(__MINGW32__) || defined(__CYGWIN__)
   LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
   if ( gCpuCaps.hasSSE ) {
      mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
      exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
      __asm__ volatile ("xorps %xmm0, %xmm0");
      SetUnhandledExceptionFilter(exc_fil);
      mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
   }
#elif defined(__OS2__)
   EXCEPTIONREGISTRATIONRECORD RegRec = { 0, &os2_sig_handler_sse };
   if ( gCpuCaps.hasSSE ) {
      mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
      DosSetExceptionHandler( &RegRec );
      __asm__ volatile ("xorps %xmm0, %xmm0");
      DosUnsetExceptionHandler( &RegRec );
      mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
   }
#elif defined(__linux__)
#if defined(_POSIX_SOURCE)
   struct sigaction saved_sigill;

   /* Save the original signal handlers.
    */
   sigaction( SIGILL, NULL, &saved_sigill );

   signal( SIGILL, (void (*)(int))sigill_handler_sse );

   /* Emulate test for OSFXSR in CR4.  The OS will set this bit if it
    * supports the extended FPU save and restore required for SSE.  If
    * we execute an SSE instruction on a PIII and get a SIGILL, the OS
    * doesn't support Streaming SIMD Exceptions, even if the processor
    * does.
    */
   if ( gCpuCaps.hasSSE ) {
      mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );

//      __asm__ volatile ("xorps %%xmm0, %%xmm0");
      __asm__ volatile ("xorps %xmm0, %xmm0");

      mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
   }

   /* Restore the original signal handlers.
    */
   sigaction( SIGILL, &saved_sigill, NULL );

   /* If we've gotten to here and the XMM CPUID bit is still set, we're
    * safe to go ahead and hook out the SSE code throughout Mesa.
    */
   mp_msg(MSGT_CPUDETECT,MSGL_V, "Tests of OS support for SSE %s\n", gCpuCaps.hasSSE ? "passed." : "failed!" );
#else
   /* We can't use POSIX signal handling to test the availability of
    * SSE, so we disable it by default.
    */
   mp_msg(MSGT_CPUDETECT,MSGL_WARN, "Cannot test OS support for SSE, disabling to be safe.\n" );
   gCpuCaps.hasSSE=0;
#endif /* _POSIX_SOURCE */
#else
   /* Do nothing on other platforms for now.
    */
   mp_msg(MSGT_CPUDETECT,MSGL_WARN, "Cannot test OS support for SSE, leaving disabled.\n" );
   gCpuCaps.hasSSE=0;
#endif /* __linux__ */
}
#else /* ARCH_X86 */

#ifdef __APPLE__
#include <sys/sysctl.h>
#elif defined(__AMIGAOS4__)
/* nothing */
#else
#include <signal.h>
#include <setjmp.h>

static sigjmp_buf jmpbuf;
static volatile sig_atomic_t canjump = 0;

static void sigill_handler (int sig)
{
    if (!canjump) {
        signal (sig, SIG_DFL);
        raise (sig);
    }
    
    canjump = 0;
    siglongjmp (jmpbuf, 1);
}
#endif /* __APPLE__ */

void GetCpuCaps( CpuCaps *caps)
{
	caps->cpuType=0;
	caps->cpuModel=0;
	caps->cpuStepping=0;
	caps->hasMMX=0;
	caps->hasMMX2=0;
	caps->has3DNow=0;
	caps->has3DNowExt=0;
	caps->hasSSE=0;
	caps->hasSSE2=0;
	caps->hasSSE3=0;
	caps->hasSSSE3=0;
	caps->hasSSE4a=0;
	caps->isX86=0;
	caps->hasAltiVec = 0;
#if HAVE_ALTIVEC   
#ifdef __APPLE__
/*
  rip-off from ffmpeg altivec detection code.
  this code also appears on Apple's AltiVec pages.
 */
        {
                int sels[2] = {CTL_HW, HW_VECTORUNIT};
                int has_vu = 0;
                size_t len = sizeof(has_vu);
                int err;

                err = sysctl(sels, 2, &has_vu, &len, NULL, 0);   

                if (err == 0)
                        if (has_vu != 0)
                                caps->hasAltiVec = 1;
        }
#elif defined(__AMIGAOS4__)
        ULONG result = 0;

        GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
        if (result == VECTORTYPE_ALTIVEC)
        	caps->hasAltiVec = 1;
#else
/* no Darwin, do it the brute-force way */
/* this is borrowed from the libmpeg2 library */
        {
          signal (SIGILL, sigill_handler);
          if (sigsetjmp (jmpbuf, 1)) {
            signal (SIGILL, SIG_DFL);
          } else {
            canjump = 1;
            
            __asm__ volatile ("mtspr 256, %0\n\t"
                          "vand %%v0, %%v0, %%v0"
                          :
                          : "r" (-1));
            
            signal (SIGILL, SIG_DFL);
            caps->hasAltiVec = 1;
          }
        }
#endif /* __APPLE__ */
        mp_msg(MSGT_CPUDETECT,MSGL_V,"AltiVec %sfound\n", (caps->hasAltiVec ? "" : "not "));
#endif /* HAVE_ALTIVEC */

if (ARCH_IA64)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: Intel Itanium\n");

if (ARCH_SPARC)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: Sun Sparc\n");

if (ARCH_ARM)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: ARM\n");

if (ARCH_PPC)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: PowerPC\n");

if (ARCH_ALPHA)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: Digital Alpha\n");

if (ARCH_SGI_MIPS)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: SGI MIPS\n");

if (ARCH_PA_RISC)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: Hewlett-Packard PA-RISC\n");

if (ARCH_S390)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: IBM S/390\n");

if (ARCH_S390X)
	mp_msg(MSGT_CPUDETECT,MSGL_V,"CPU: IBM S/390X\n");

if (ARCH_VAX)
	mp_msg(MSGT_CPUDETECT,MSGL_V, "CPU: Digital VAX\n" );

if (ARCH_XTENSA)
	mp_msg(MSGT_CPUDETECT,MSGL_V, "CPU: Tensilica Xtensa\n" );
}
#endif /* !ARCH_X86 */