changeset 35101:3e1a4184b70e

Add SSE3, SSE4, SSE4.2 and AVX detection. Patch by Xidorn Quan, quanxunzhen gmail
author cehoyos
date Fri, 14 Sep 2012 14:16:08 +0000
parents bc7b006732c0
children d0909b13984f
files configure cpudetect.c cpudetect.h
diffstat 3 files changed, 39 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/configure	Fri Sep 14 14:13:54 2012 +0000
+++ b/configure	Fri Sep 14 14:16:08 2012 +0000
@@ -581,7 +581,11 @@
   --enable-3dnowext         enable extended 3DNow! [autodetect]
   --enable-sse              enable SSE [autodetect]
   --enable-sse2             enable SSE2 [autodetect]
+  --enable-sse3             enable SSE3 [autodetect]
   --enable-ssse3            enable SSSE3 [autodetect]
+  --enable-sse4             enable SSE4 [autodetect]
+  --enable-sse42            enable SSE4.2 [autodetect]
+  --enable-avx              enable AVX [autodetect]
   --enable-shm              enable shm [autodetect]
   --enable-altivec          enable AltiVec (PowerPC) [autodetect]
   --enable-armv5te          enable DSP extensions (ARM) [autodetect]
@@ -634,7 +638,11 @@
 _mmxext=auto
 _sse=auto
 _sse2=auto
+_sse3=auto
 _ssse3=auto
+_sse4_1=auto
+_sse4_2=auto
+_avx=auto
 _cmov=auto
 _fast_cmov=auto
 _fast_clz=auto
@@ -1441,8 +1449,16 @@
   --disable-sse) _sse=no ;;
   --enable-sse2) _sse2=yes ;;
   --disable-sse2) _sse2=no ;;
+  --enable-sse3) _sse3=yes ;;
+  --disable-sse3) _sse3=no ;;
   --enable-ssse3) _ssse3=yes ;;
   --disable-ssse3) _ssse3=no ;;
+  --enable-sse4) _sse4_1=yes;;
+  --disable-sse4) _sse4_1=no;;
+  --enable-sse42) _sse4_2=yes;;
+  --disable-sse42) _sse4_2=no;;
+  --enable-avx) _avx=yes;;
+  --disable-avx) _avx=no;;
   --enable-mmxext) _mmxext=yes ;;
   --disable-mmxext) _mmxext=no ;;
   --enable-3dnow) _3dnow=yes ;;
@@ -1816,7 +1832,7 @@
   exts=$($_cpuinfo | egrep 'features|flags' | cut -d ':' -f 2 | head -n 1)
 
   pparam=$(echo $exts | sed -e s/k6_mtrr/mtrr/ -e s/cyrix_arr/mtrr/ -e s/centaur_mcr/mtrr/ \
-                            -e s/xmm/sse/ -e s/kni/sse/)
+                            -e s/xmm/sse/ -e s/kni/sse/ -e s/pni/sse3/)
   # SSE implies MMX2, but not all SSE processors report the mmxext CPU flag.
   pparam=$(echo $pparam | sed -e 's/sse/sse mmxext/')
 
@@ -1869,7 +1885,11 @@
   extcheck $_3dnowext "3dnowext" "pswapd %%mm0, %%mm0"
   extcheck $_sse      "sse"      "xorps %%xmm0, %%xmm0" || _gcc3_ext="$_gcc3_ext -mno-sse"
   extcheck $_sse2     "sse2"     "xorpd %%xmm0, %%xmm0" || _gcc3_ext="$_gcc3_ext -mno-sse2"
+  extcheck $_sse3     "sse3"     "addsubps %%xmm0, %%xmm0"
   extcheck $_ssse3    "ssse3"    "pabsd %%xmm0, %%xmm0"
+  extcheck $_sse4_1   "sse4_1"   "pmaxsb %%xmm0, %%xmm0"
+  extcheck $_sse4_2   "sse4_2"   "pcmpgtq %%xmm0, %%xmm0"
+  extcheck $_avx      "avx"      "vpabsw %%xmm0, %%xmm0"
   extcheck $_cmov     "cmov"     "cmovb %%eax,  %%ebx"
 
   echocheck "mtrr support"
@@ -2521,7 +2541,11 @@
     test "$_mmxext"   != no && _mmxext=yes
     test "$_sse"      != no && _sse=yes
     test "$_sse2"     != no && _sse2=yes
+    test "$_sse3"     != no && _sse3=yes
     test "$_ssse3"    != no && _ssse3=yes
+    test "$_sse4_1"   != no && _sse4_1=yes
+    test "$_sse4_2"   != no && _sse4_2=yes
+    test "$_avx"      != no && _avx=yes
     test "$_mtrr"     != no && _mtrr=yes
   fi
   if ppc; then
@@ -3003,7 +3027,7 @@
   echores "$_iwmmxt"
 fi
 
-cpuexts_all='ALTIVEC AVX MMX MMX2 MMXEXT AMD3DNOW AMD3DNOWEXT SSE SSE2 SSE3 SSSE3 SSE4 FAST_CMOV CMOV FAST_CLZ ARMV5TE ARMV6 ARMV6T2 ARMVFP VFPV3 NEON IWMMXT MMI VIS MVI'
+cpuexts_all='ALTIVEC AVX MMX MMX2 MMXEXT AMD3DNOW AMD3DNOWEXT SSE SSE2 SSE3 SSSE3 SSE4 SSE42 FAST_CMOV CMOV FAST_CLZ ARMV5TE ARMV6 ARMV6T2 ARMVFP VFPV3 NEON IWMMXT MMI VIS MVI'
 test "$_altivec"   = yes && cpuexts="ALTIVEC $cpuexts"
 test "$_mmx"       = yes && cpuexts="MMX $cpuexts"
 test "$_mmxext"    = yes && cpuexts="MMX2 $cpuexts"
@@ -3012,7 +3036,11 @@
 test "$_3dnowext"  = yes && cpuexts="AMD3DNOWEXT $cpuexts"
 test "$_sse"       = yes && cpuexts="SSE $cpuexts"
 test "$_sse2"      = yes && cpuexts="SSE2 $cpuexts"
+test "$_sse3"      = yes && cpuexts="SSE3 $cpuexts"
 test "$_ssse3"     = yes && cpuexts="SSSE3 $cpuexts"
+test "$_sse4_1"    = yes && cpuexts="SSE4 $cpuexts"
+test "$_sse4_2"    = yes && cpuexts="SSE42 $cpuexts"
+test "$_avx"       = yes && cpuexts="AVX $cpuexts"
 test "$_cmov"      = yes && cpuexts="CMOV $cpuexts"
 test "$_fast_cmov" = yes && cpuexts="FAST_CMOV $cpuexts"
 test "$_fast_clz"  = yes && cpuexts="FAST_CLZ $cpuexts"
--- a/cpudetect.c	Fri Sep 14 14:13:54 2012 +0000
+++ b/cpudetect.c	Fri Sep 14 14:16:08 2012 +0000
@@ -315,6 +315,9 @@
         caps->hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; // 0x4000000
         caps->hasSSE3 = (regs2[2] & 1);        // 0x0000001
         caps->hasSSSE3 = (regs2[2] & (1 << 9 )) >>  9; // 0x0000200
+        caps->hasSSE4 = (regs2[2] & (1 << 19 )) >> 19; // 0x0080000
+        caps->hasSSE42 = (regs2[2] & (1 << 20)) >> 20; // 0x0100000
+        caps->hasAVX  = (regs2[2] & (1 << 28 )) >> 28; // 0x10000000
         caps->hasMMX2 = caps->hasSSE; // SSE cpus supports mmxext too
         cl_size = ((regs2[1] >> 8) & 0xFF)*8;
         if(cl_size) caps->cl_size = cl_size;
@@ -457,7 +460,10 @@
     caps->hasSSE2=0;
     caps->hasSSE3=0;
     caps->hasSSSE3=0;
+    caps->hasSSE4=0;
+    caps->hasSSE42=0;
     caps->hasSSE4a=0;
+    caps->hasAVX=0;
     caps->isX86=0;
     caps->hasAltiVec = 0;
 #if HAVE_ALTIVEC
--- a/cpudetect.h	Fri Sep 14 14:13:54 2012 +0000
+++ b/cpudetect.h	Fri Sep 14 14:16:08 2012 +0000
@@ -36,7 +36,10 @@
     int hasSSE2;
     int hasSSE3;
     int hasSSSE3;
+    int hasSSE4;
+    int hasSSE42;
     int hasSSE4a;
+    int hasAVX;
     int isX86;
     unsigned cl_size; /* size of cache line */
     int hasAltiVec;