diff mp3lib/dct64_3dnow.c @ 18833:c452bd0d6ede

fix conversion float to int to use saturated ops, Patch by Zuxy Meng < zuxy PP meng AH gmail PP com > Original thread: Date: Jun 21, 2006 2:50 PM Subject: [MPlayer-dev-eng] [PATCH] Saturation & PSWAPD bugfix in mp3lib/dct64_3dnow.c & mp3lib/dct64_k7.c
author gpoirier
date Tue, 27 Jun 2006 05:51:45 +0000
parents e7a129082fda
children 9a90621765b0
line wrap: on
line diff
--- a/mp3lib/dct64_3dnow.c	Mon Jun 26 22:00:44 2006 +0000
+++ b/mp3lib/dct64_3dnow.c	Tue Jun 27 05:51:45 2006 +0000
@@ -746,21 +746,23 @@
 "	pfacc	%%mm1, %%mm0\n\t"
 "	pfmul	%%mm6, %%mm0\n\t"
 "	pf2id	%%mm0, %%mm0\n\t"
+"	packssdw %%mm0, %%mm0\n\t"
 "	movd	%%mm0, %%eax\n\t"
 "	movw    %%ax, 512(%%esi)\n\t"
-"	psrlq	$32, %%mm0\n\t"
-"	movd	%%mm0, %%eax\n\t"
+"	shrl	$16, %%eax\n\t"
 "	movw    %%ax, (%%esi)\n\t"
 
 "	movd    12(%%ecx), %%mm0\n\t"
 "	pfsub    8(%%ecx), %%mm0\n\t"
 "	pfmul  120(%%ebx), %%mm0\n\t"
 "	pf2id    %%mm0, %%mm7\n\t"
+"	packssdw %%mm7, %%mm7\n\t"
 "	movd	 %%mm7, %%eax\n\t"
 "	movw     %%ax, 256(%%edi)\n\t"
 "	pfadd   12(%%ecx), %%mm0\n\t"
 "	pfadd    8(%%ecx), %%mm0\n\t"
 "	pf2id    %%mm0, %%mm0\n\t"
+"	packssdw %%mm0, %%mm0\n\t"
 "	movd	 %%mm0, %%eax\n\t"
 "	movw     %%ax, 256(%%esi)\n\t"
 
@@ -775,6 +777,7 @@
 "	movq   %%mm2, %%mm1\n\t"
 
 "	pf2id  %%mm2, %%mm7\n\t"
+"	packssdw %%mm7, %%mm7\n\t"
 "	movd   %%mm7, %%eax\n\t"
 "	movw   %%ax, 384(%%edi)\n\t"
 
@@ -785,14 +788,17 @@
 "	pfadd  16(%%ecx), %%mm0\n\t"
 "	pfadd  20(%%ecx), %%mm0\n\t"
 "	pf2id  %%mm0, %%mm0\n\t"
+"	packssdw %%mm0, %%mm0\n\t"
 "	movd   %%mm0, %%eax\n\t"
 "	movw   %%ax, 384(%%esi)\n\t"
 "	pfadd  %%mm3, %%mm1\n\t"
 "	pf2id  %%mm1, %%mm1\n\t"
+"	packssdw %%mm1, %%mm1\n\t"
 "	movd   %%mm1, %%eax\n\t"
 "	movw   %%ax, 128(%%esi)\n\t"
 "	pfadd  %%mm3, %%mm2\n\t"
 "	pf2id  %%mm2, %%mm2\n\t"
+"	packssdw %%mm2, %%mm2\n\t"
 "	movd   %%mm2, %%eax\n\t"
 "	movw   %%ax, 128(%%edi)\n\t"
 
@@ -804,14 +810,14 @@
 "	pfadd   40(%%edx), %%mm1\n\t"
 "	pf2id   %%mm0, %%mm0\n\t"
 "	pf2id   %%mm1, %%mm1\n\t"
+"	packssdw %%mm0, %%mm0\n\t"
+"	packssdw %%mm1, %%mm1\n\t"
 "	movd	%%mm0, %%eax\n\t"
 "	movd	%%mm1, %%ecx\n\t"
 "	movw    %%ax, 448(%%esi)\n\t"
 "	movw    %%cx, 320(%%esi)\n\t"
-"	psrlq   $32, %%mm0\n\t"
-"	psrlq   $32, %%mm1\n\t"
-"	movd	%%mm0, %%eax\n\t"
-"	movd	%%mm1, %%ecx\n\t"
+"	shrl	$16, %%eax\n\t"
+"	shrl	$16, %%ecx\n\t"
 "	movw    %%ax, 64(%%edi)\n\t"
 "	movw    %%cx, 192(%%edi)\n\t"
 
@@ -829,28 +835,30 @@
 "	pfadd  %%mm4, %%mm3\n\t"
 "	pf2id  %%mm0, %%mm1\n\t"
 "	pf2id  %%mm3, %%mm3\n\t"
+"	packssdw %%mm0, %%mm0\n\t"
+"	packssdw %%mm3, %%mm3\n\t"
 "	pfadd  88(%%edx), %%mm5\n\t"
 "	movd   %%mm1, %%eax\n\t"
 "	movd   %%mm3, %%ecx\n\t"
 "	movw   %%ax, 448(%%edi)\n\t"
 "	movw   %%cx, 192(%%esi)\n\t"
 "	pf2id  %%mm5, %%mm5\n\t"
-"	psrlq  $32, %%mm1\n\t"
-"       psrlq  $32, %%mm3\n\t"
+"	packssdw %%mm5, %%mm5\n\t"
+"	shrl   $16, %%eax\n\t"
+"	shrl   $16, %%ecx\n\t"
 "	movd   %%mm5, %%ebx\n\t"
-"	movd   %%mm1, %%eax\n\t"
-"	movd   %%mm3, %%ecx\n\t"
 "	movw   %%bx, 96(%%esi)\n\t"
 "	movw   %%ax, 480(%%edi)\n\t"
 "	movw   %%cx, 64(%%esi)\n\t"
 "	pfadd  %%mm2, %%mm0\n\t"
 "	pf2id  %%mm0, %%mm0\n\t"
+"	packssdw %%mm0, %%mm0\n\t"
 "	movd   %%mm0, %%eax\n\t"
 "	pfadd  68(%%edx), %%mm6\n\t"
 "	movw   %%ax, 320(%%edi)\n\t"
-"	psrlq  $32, %%mm0\n\t"
+"	shr    $16, %%eax\n\t"
 "	pf2id  %%mm6, %%mm6\n\t"
-"	movd   %%mm0, %%eax\n\t"
+"	packssdw %%mm6, %%mm6\n\t"
 "	movd   %%mm6, %%ebx\n\t"
 "	movw   %%ax, 416(%%edi)\n\t"
 "	movw   %%bx, 32(%%esi)\n\t"
@@ -870,18 +878,18 @@
 "	pf2id  %%mm0, %%mm0\n\t"
 "	pf2id  %%mm2, %%mm2\n\t"
 "	pf2id  %%mm4, %%mm4\n\t"
+"	packssdw %%mm0, %%mm0\n\t"
+"	packssdw %%mm2, %%mm2\n\t"
+"	packssdw %%mm4, %%mm4\n\t"
 "	movd   %%mm0, %%eax\n\t"
 "	movd   %%mm2, %%ecx\n\t"
 "	movd   %%mm4, %%ebx\n\t"
 "	movw   %%ax, 480(%%esi)\n\t"
 "	movw   %%cx, 352(%%esi)\n\t"
 "	movw   %%bx, 224(%%esi)\n\t"
-"	psrlq  $32, %%mm0\n\t"
-"	psrlq  $32, %%mm2\n\t"
-"	psrlq  $32, %%mm4\n\t"
-"	movd   %%mm0, %%eax\n\t"
-"	movd   %%mm2, %%ecx\n\t"
-"	movd   %%mm4, %%ebx\n\t"
+"	shrl   $16, %%eax\n\t"
+"	shrl   $16, %%ecx\n\t"
+"	shrl   $16, %%ebx\n\t"
 "	movw   %%ax, 32(%%edi)\n\t"
 "	movw   %%cx, 160(%%edi)\n\t"
 "	movw   %%bx, 288(%%edi)\n\t"
@@ -891,15 +899,18 @@
 "	pf2id  %%mm1, %%mm1\n\t"
 "	pf2id  %%mm3, %%mm3\n\t"
 "	pf2id  %%mm5, %%mm5\n\t"
+"	packssdw %%mm1, %%mm1\n\t"
+"	packssdw %%mm3, %%mm3\n\t"
+"	packssdw %%mm5, %%mm5\n\t"
 "	movd   %%mm1, %%eax\n\t"
 "	movd   %%mm3, %%ecx\n\t"
 "	movd   %%mm5, %%ebx\n\t"
 "	movw   %%ax, 416(%%esi)\n\t"
 "	movw   %%cx, 288(%%esi)\n\t"
 "	movw   %%bx, 160(%%esi)\n\t"
-"	psrlq  $32, %%mm1\n\t"
-"	psrlq  $32, %%mm3\n\t"
-"	psrlq  $32, %%mm5\n\t"
+"	shrl   $16, %%eax\n\t"
+"	shrl   $16, %%ecx\n\t"
+"	shrl   $16, %%ebx\n\t"
 "	movd   %%mm1, %%eax\n\t"
 "	movd   %%mm3, %%ecx\n\t"
 "	movd   %%mm5, %%ebx\n\t"