changeset 350:601822cc8c52

applied MMX2 memcpy() patch by Nick Kurshev
author arpi_esp
date Wed, 11 Apr 2001 12:47:45 +0000
parents 96793536a478
children 2c4ded6ffebc
files configure libvo/mmx.h libvo/vo_3dfx.c libvo/vo_fbdev.c libvo/vo_odivx.c libvo/vo_sdl.c libvo/vo_syncfb.c libvo/vo_x11.c libvo/vo_xv.c
diffstat 9 files changed, 129 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/configure	Wed Apr 11 12:46:49 2001 +0000
+++ b/configure	Wed Apr 11 12:47:45 2001 +0000
@@ -70,6 +70,7 @@
 params:
         --cc                    use this C compiler to build MPlayer [gcc]
         --enable-mmx            build with mmx support [autodetect]
+        --enable-mmx2           build with mmx2 support (PIII, Athlon) [autodetect]
         --enable-3dnow          build with 3dnow! support [autodetect]
         --enable-sse            build with sse support [autodetect]
         --enable-gl             build with OpenGL render support [autodetect]
@@ -153,6 +154,7 @@
 pstepping=`cat /proc/cpuinfo | grep 'stepping' | cut -d ':' -f 2 | cut -d ' ' -f 2`
 
 _mmx=no
+_mmx2=no
 _3dnow=no
 _mtrr=no
 _sse=no
@@ -193,6 +195,9 @@
   mmx)
         _mmx=yes
         ;;
+  mmxext)
+        _mmx2=yes
+        ;;
   mtrr)
         _mtrr=yes
         ;;
@@ -444,6 +449,9 @@
   --enable-mmx)
         _mmx=yes
         ;;
+  --enable-mmx2)
+        _mmx2=yes
+        ;;
   --enable-mtrr)
   	_mtrr=yes
 	;;
@@ -506,6 +514,7 @@
         ;;
   --disable-mmx)
         _mmx=no
+	_mmx2=no
         ;;
   --disable-mtrr)
   	_mtrr=no
@@ -573,6 +582,7 @@
 echo "Checking for cpu type ... $pname"
 echo "Optimizing to ... $proc"
 echo "Checking for mmx support ... $_mmx"
+echo "Checking for mmx2 support ... $_mmx2"
 echo "Checking for 3dnow support ... $_3dnow"
 echo "Checking for sse support ... $_sse"
 echo "Checking for mtrr support ... $_mtrr"
@@ -675,6 +685,12 @@
  _mmx='#undef HAVE_MMX'
 fi
 
+if [ "$_mmx2" = "yes" ]; then
+ _mmx2='#define HAVE_MMX2'
+else
+ _mmx2='#undef HAVE_MMX2'
+fi
+
 if [ $_3dnow = yes ]; then
  _3dnowm='#define HAVE_3DNOW'
 else
@@ -851,6 +867,7 @@
 $_mlib     // available only on solaris
 $_3dnowm   // only define if you have 3DNOW (AMD k6-2, AMD Athlon, iDT WinChip, etc.)
 $_mmx      // only define if you have MMX
+$_mmx2     // only define if you have MMX2
 $_ssem     // only define if you have SSE (Intel Pentium III or Celeron II)
 
 /* libvo options */
--- a/libvo/mmx.h	Wed Apr 11 12:46:49 2001 +0000
+++ b/libvo/mmx.h	Wed Apr 11 12:47:45 2001 +0000
@@ -27,6 +27,104 @@
 #ifndef _MMX_H
 #define _MMX_H
 
+/*
+ This part of code was taken by from Linux-2.4.3 and slightly modified
+for MMX2 instruction set. I have done it since linux uses page aligned
+blocks but mplayer uses weakly ordered data and original sources can not
+speedup their. Only using prefetch and movntq together have effect! 
+If you have questions please contact with me: Nick Kurshev: nickols_k@mail.ru.
+*/
+
+#ifndef HAVE_MMX2
+//static inline void * __memcpy(void * to, const void * from, unsigned n)
+inline static void * memcpy(void * to, const void * from, unsigned n)
+{
+int d0, d1, d2;
+__asm__ __volatile__(
+	"rep ; movsl\n\t"
+	"testb $2,%b4\n\t"
+	"je 1f\n\t"
+	"movsw\n"
+	"1:\ttestb $1,%b4\n\t"
+	"je 2f\n\t"
+	"movsb\n"
+	"2:"
+	: "=&c" (d0), "=&D" (d1), "=&S" (d2)
+	:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+	: "memory");
+return (to);
+}
+#else
+//inline static void *__memcpy_mmx2(void *to, const void *from, unsigned len)
+inline static void * memcpy(void * to, const void * from, unsigned n)
+{
+	void *p;
+	int i;
+
+        if(len >= 0x200) /* 512-byte blocks */
+	{
+  	  p = to;
+	  i = len >> 6; /* len/64 */
+	__asm__ __volatile__ (
+		"1: prefetch (%0)\n"		/* This set is 28 bytes */
+		"   prefetch 64(%0)\n"
+		"   prefetch 128(%0)\n"
+		"   prefetch 192(%0)\n"
+		"   prefetch 256(%0)\n"
+		"2:  \n"
+		".section .fixup, \"ax\"\n"
+		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+		"   jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 1b, 3b\n"
+		".previous"
+		: : "r" (from) );
+		
+	
+	for(; i>0; i--)
+	{
+		__asm__ __volatile__ (
+		"1:  prefetch 320(%0)\n"
+		"2:  movq (%0), %%mm0\n"
+		"  movq 8(%0), %%mm1\n"
+		"  movq 16(%0), %%mm2\n"
+		"  movq 24(%0), %%mm3\n"
+		"  movntq %%mm0, (%1)\n"
+		"  movntq %%mm1, 8(%1)\n"
+		"  movntq %%mm2, 16(%1)\n"
+		"  movntq %%mm3, 24(%1)\n"
+		"  movq 32(%0), %%mm0\n"
+		"  movq 40(%0), %%mm1\n"
+		"  movq 48(%0), %%mm2\n"
+		"  movq 56(%0), %%mm3\n"
+		"  movntq %%mm0, 32(%1)\n"
+		"  movntq %%mm1, 40(%1)\n"
+		"  movntq %%mm2, 48(%1)\n"
+		"  movntq %%mm3, 56(%1)\n"
+		".section .fixup, \"ax\"\n"
+		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+		"   jmp 2b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		"	.align 4\n"
+		"	.long 1b, 3b\n"
+		".previous"
+		: : "r" (from), "r" (to) : "memory");
+		from+=64;
+		to+=64;
+	}
+	        __asm__ __volatile__ ("emms":::"memory");
+	}
+	/*
+	 *	Now do the tail of the block
+	 */
+	__memcpy(to, from, len&63);
+	return p;
+}
+#endif
+
 
 /*	Warning:  at this writing, the version of GAS packaged
 	with most Linux distributions does not handle the
--- a/libvo/vo_3dfx.c	Wed Apr 11 12:46:49 2001 +0000
+++ b/libvo/vo_3dfx.c	Wed Apr 11 12:47:45 2001 +0000
@@ -49,6 +49,8 @@
 
 #include "drivers/3dfx.h"
 
+#include "mmx.h"
+
 static vo_info_t vo_info = 
 {
 	"3dfx (/dev/3dfx)",
--- a/libvo/vo_fbdev.c	Wed Apr 11 12:46:49 2001 +0000
+++ b/libvo/vo_fbdev.c	Wed Apr 11 12:47:45 2001 +0000
@@ -24,6 +24,8 @@
 #include "yuv2rgb.h"
 extern void rgb15to16_mmx(char *s0, char *d0, int count);
 
+#include "mmx.h"
+
 LIBVO_EXTERN(fbdev)
 
 static vo_info_t vo_info = {
--- a/libvo/vo_odivx.c	Wed Apr 11 12:46:49 2001 +0000
+++ b/libvo/vo_odivx.c	Wed Apr 11 12:47:45 2001 +0000
@@ -19,6 +19,8 @@
 
 #include "../encore/encore.h"
 
+#include "mmx.h"
+
 static vo_info_t vo_info = 
 {
 	"OpenDivX AVI File writer",
--- a/libvo/vo_sdl.c	Wed Apr 11 12:46:49 2001 +0000
+++ b/libvo/vo_sdl.c	Wed Apr 11 12:47:45 2001 +0000
@@ -68,6 +68,8 @@
 #include "video_out.h"
 #include "video_out_internal.h"
 
+
+#include "mmx.h"
 LIBVO_EXTERN(sdl)
 
 //#include "log.h"
--- a/libvo/vo_syncfb.c	Wed Apr 11 12:46:49 2001 +0000
+++ b/libvo/vo_syncfb.c	Wed Apr 11 12:47:45 2001 +0000
@@ -43,6 +43,8 @@
 
 #include "drivers/syncfb/syncfb.h"
 
+#include "mmx.h"
+
 static vo_info_t vo_info =
 {
 	"Matrox G200/G400 Synchronous framebuffer (/dev/syncfb)",
--- a/libvo/vo_x11.c	Wed Apr 11 12:46:49 2001 +0000
+++ b/libvo/vo_x11.c	Wed Apr 11 12:47:45 2001 +0000
@@ -36,6 +36,8 @@
 
 #include "x11_common.h"
 
+#include "mmx.h"
+
 static vo_info_t vo_info =
 {
         "X11 ( XImage/Shm )",
--- a/libvo/vo_xv.c	Wed Apr 11 12:46:49 2001 +0000
+++ b/libvo/vo_xv.c	Wed Apr 11 12:47:45 2001 +0000
@@ -28,6 +28,8 @@
 
 #include "x11_common.h"
 
+#include "mmx.h"
+
 static vo_info_t vo_info =
 {
         "X11/Xv",