Mercurial > mplayer.hg
view mp3lib/dct64_3dnow.c @ 34478:8e09f1cb3ecd
Fix vo_gl unsharp filter for chroma.
The syntax is a bit strange, since for inputs the components
indicate swizzles, while for outputs it is only a write mask,
thus the result must be at the correct position regardless
of the component specified for the output.
So use a 3-component vector for the constant factor.
Also make the input swizzles explicit in an attempt to make
the code less confusing (that part does change what the code
actually does).
Previous code would result in a filter strength of 0 always
being used for chroma.
author | reimar |
---|---|
date | Sat, 14 Jan 2012 15:49:54 +0000 |
parents | d0f70692a140 |
children |
line wrap: on
line source
/* * This code was taken from http://www.mpg123.org * See ChangeLog of mpg123-0.59s-pre.1 for detail * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> * Partial 3dnow! optimization by Nick Kurshev * * TODO: optimize scalar 3dnow! code * Warning: Phases 7 & 8 are not tested */ #include "config.h" #include "mangle.h" #include "mpg123.h" #include "libavutil/x86_cpu.h" static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; static float attribute_used plus_1f = 1.0; void dct64_MMX_3dnow(short *a,short *b,real *c) { char tmp[256]; __asm__ volatile( " mov %2,%%"REG_a"\n\t" " lea 128+%3,%%"REG_d"\n\t" " mov %0,%%"REG_S"\n\t" " mov %1,%%"REG_D"\n\t" " mov $"MANGLE(costab_mmx)",%%"REG_b"\n\t" " lea %3,%%"REG_c"\n\t" /* Phase 1*/ " movq (%%"REG_a"), %%mm0\n\t" " movq 8(%%"REG_a"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 120(%%"REG_a"), %%mm1\n\t" " movq 112(%%"REG_a"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, (%%"REG_d")\n\t" " movq %%mm4, 8(%%"REG_d")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsub %%mm5, %%mm7\n\t" " pfmul (%%"REG_b"), %%mm3\n\t" " pfmul 8(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 124(%%"REG_d")\n\t" " movd %%mm7, 116(%%"REG_d")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 120(%%"REG_d")\n\t" " movd %%mm7, 112(%%"REG_d")\n\t" " movq 16(%%"REG_a"), %%mm0\n\t" " movq 24(%%"REG_a"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 104(%%"REG_a"), %%mm1\n\t" " movq 96(%%"REG_a"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 16(%%"REG_d")\n\t" " movq %%mm4, 24(%%"REG_d")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsub %%mm5, %%mm7\n\t" " pfmul 16(%%"REG_b"), %%mm3\n\t" " pfmul 24(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 108(%%"REG_d")\n\t" " movd %%mm7, 100(%%"REG_d")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 104(%%"REG_d")\n\t" " movd %%mm7, 96(%%"REG_d")\n\t" " movq 32(%%"REG_a"), %%mm0\n\t" " movq 40(%%"REG_a"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 88(%%"REG_a"), %%mm1\n\t" " movq 80(%%"REG_a"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 32(%%"REG_d")\n\t" " movq %%mm4, 40(%%"REG_d")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsub %%mm5, %%mm7\n\t" " pfmul 32(%%"REG_b"), %%mm3\n\t" " pfmul 40(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 92(%%"REG_d")\n\t" " movd %%mm7, 84(%%"REG_d")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 88(%%"REG_d")\n\t" " movd %%mm7, 80(%%"REG_d")\n\t" " movq 48(%%"REG_a"), %%mm0\n\t" " movq 56(%%"REG_a"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 72(%%"REG_a"), %%mm1\n\t" " movq 64(%%"REG_a"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 48(%%"REG_d")\n\t" " movq %%mm4, 56(%%"REG_d")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsub %%mm5, %%mm7\n\t" " pfmul 48(%%"REG_b"), %%mm3\n\t" " pfmul 56(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 76(%%"REG_d")\n\t" " movd %%mm7, 68(%%"REG_d")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 72(%%"REG_d")\n\t" " movd %%mm7, 64(%%"REG_d")\n\t" /* Phase 2*/ " movq (%%"REG_d"), %%mm0\n\t" " movq 8(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 56(%%"REG_d"), %%mm1\n\t" " movq 48(%%"REG_d"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, (%%"REG_c")\n\t" " movq %%mm4, 8(%%"REG_c")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsub %%mm5, %%mm7\n\t" " pfmul 64(%%"REG_b"), %%mm3\n\t" " pfmul 72(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 60(%%"REG_c")\n\t" " movd %%mm7, 52(%%"REG_c")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 56(%%"REG_c")\n\t" " movd %%mm7, 48(%%"REG_c")\n\t" " movq 16(%%"REG_d"), %%mm0\n\t" " movq 24(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 40(%%"REG_d"), %%mm1\n\t" " movq 32(%%"REG_d"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 16(%%"REG_c")\n\t" " movq %%mm4, 24(%%"REG_c")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsub %%mm5, %%mm7\n\t" " pfmul 80(%%"REG_b"), %%mm3\n\t" " pfmul 88(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 44(%%"REG_c")\n\t" " movd %%mm7, 36(%%"REG_c")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 40(%%"REG_c")\n\t" " movd %%mm7, 32(%%"REG_c")\n\t" /* Phase 3*/ " movq 64(%%"REG_d"), %%mm0\n\t" " movq 72(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 120(%%"REG_d"), %%mm1\n\t" " movq 112(%%"REG_d"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 64(%%"REG_c")\n\t" " movq %%mm4, 72(%%"REG_c")\n\t" " pfsubr %%mm1, %%mm3\n\t" " pfsubr %%mm5, %%mm7\n\t" " pfmul 64(%%"REG_b"), %%mm3\n\t" " pfmul 72(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 124(%%"REG_c")\n\t" " movd %%mm7, 116(%%"REG_c")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 120(%%"REG_c")\n\t" " movd %%mm7, 112(%%"REG_c")\n\t" " movq 80(%%"REG_d"), %%mm0\n\t" " movq 88(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 104(%%"REG_d"), %%mm1\n\t" " movq 96(%%"REG_d"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 80(%%"REG_c")\n\t" " movq %%mm4, 88(%%"REG_c")\n\t" " pfsubr %%mm1, %%mm3\n\t" " pfsubr %%mm5, %%mm7\n\t" " pfmul 80(%%"REG_b"), %%mm3\n\t" " pfmul 88(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 108(%%"REG_c")\n\t" " movd %%mm7, 100(%%"REG_c")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 104(%%"REG_c")\n\t" " movd %%mm7, 96(%%"REG_c")\n\t" /* Phase 4*/ " movq (%%"REG_c"), %%mm0\n\t" " movq 8(%%"REG_c"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 24(%%"REG_c"), %%mm1\n\t" " movq 16(%%"REG_c"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, (%%"REG_d")\n\t" " movq %%mm4, 8(%%"REG_d")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsub %%mm5, %%mm7\n\t" " pfmul 96(%%"REG_b"), %%mm3\n\t" " pfmul 104(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 28(%%"REG_d")\n\t" " movd %%mm7, 20(%%"REG_d")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 24(%%"REG_d")\n\t" " movd %%mm7, 16(%%"REG_d")\n\t" " movq 32(%%"REG_c"), %%mm0\n\t" " movq 40(%%"REG_c"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 56(%%"REG_c"), %%mm1\n\t" " movq 48(%%"REG_c"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 32(%%"REG_d")\n\t" " movq %%mm4, 40(%%"REG_d")\n\t" " pfsubr %%mm1, %%mm3\n\t" " pfsubr %%mm5, %%mm7\n\t" " pfmul 96(%%"REG_b"), %%mm3\n\t" " pfmul 104(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 60(%%"REG_d")\n\t" " movd %%mm7, 52(%%"REG_d")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 56(%%"REG_d")\n\t" " movd %%mm7, 48(%%"REG_d")\n\t" " movq 64(%%"REG_c"), %%mm0\n\t" " movq 72(%%"REG_c"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 88(%%"REG_c"), %%mm1\n\t" " movq 80(%%"REG_c"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 64(%%"REG_d")\n\t" " movq %%mm4, 72(%%"REG_d")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsub %%mm5, %%mm7\n\t" " pfmul 96(%%"REG_b"), %%mm3\n\t" " pfmul 104(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 92(%%"REG_d")\n\t" " movd %%mm7, 84(%%"REG_d")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 88(%%"REG_d")\n\t" " movd %%mm7, 80(%%"REG_d")\n\t" " movq 96(%%"REG_c"), %%mm0\n\t" " movq 104(%%"REG_c"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 120(%%"REG_c"), %%mm1\n\t" " movq 112(%%"REG_c"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 96(%%"REG_d")\n\t" " movq %%mm4, 104(%%"REG_d")\n\t" " pfsubr %%mm1, %%mm3\n\t" " pfsubr %%mm5, %%mm7\n\t" " pfmul 96(%%"REG_b"), %%mm3\n\t" " pfmul 104(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 124(%%"REG_d")\n\t" " movd %%mm7, 116(%%"REG_d")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 120(%%"REG_d")\n\t" " movd %%mm7, 112(%%"REG_d")\n\t" /* Phase 5 */ " movq (%%"REG_d"), %%mm0\n\t" " movq 16(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 8(%%"REG_d"), %%mm1\n\t" " movq 24(%%"REG_d"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, (%%"REG_c")\n\t" " movq %%mm4, 16(%%"REG_c")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsubr %%mm5, %%mm7\n\t" " pfmul 112(%%"REG_b"), %%mm3\n\t" " pfmul 112(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 12(%%"REG_c")\n\t" " movd %%mm7, 28(%%"REG_c")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 8(%%"REG_c")\n\t" " movd %%mm7, 24(%%"REG_c")\n\t" " movq 32(%%"REG_d"), %%mm0\n\t" " movq 48(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 40(%%"REG_d"), %%mm1\n\t" " movq 56(%%"REG_d"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 32(%%"REG_c")\n\t" " movq %%mm4, 48(%%"REG_c")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsubr %%mm5, %%mm7\n\t" " pfmul 112(%%"REG_b"), %%mm3\n\t" " pfmul 112(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 44(%%"REG_c")\n\t" " movd %%mm7, 60(%%"REG_c")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 40(%%"REG_c")\n\t" " movd %%mm7, 56(%%"REG_c")\n\t" " movq 64(%%"REG_d"), %%mm0\n\t" " movq 80(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 72(%%"REG_d"), %%mm1\n\t" " movq 88(%%"REG_d"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 64(%%"REG_c")\n\t" " movq %%mm4, 80(%%"REG_c")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsubr %%mm5, %%mm7\n\t" " pfmul 112(%%"REG_b"), %%mm3\n\t" " pfmul 112(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 76(%%"REG_c")\n\t" " movd %%mm7, 92(%%"REG_c")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 72(%%"REG_c")\n\t" " movd %%mm7, 88(%%"REG_c")\n\t" " movq 96(%%"REG_d"), %%mm0\n\t" " movq 112(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm3\n\t" " movq %%mm4, %%mm7\n\t" " movq 104(%%"REG_d"), %%mm1\n\t" " movq 120(%%"REG_d"), %%mm5\n\t" /* n.b.: pswapd*/ " movq %%mm1, %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm5\n\t" " punpckldq %%mm2, %%mm1\n\t" " punpckldq %%mm6, %%mm5\n\t" /**/ " pfadd %%mm1, %%mm0\n\t" " pfadd %%mm5, %%mm4\n\t" " movq %%mm0, 96(%%"REG_c")\n\t" " movq %%mm4, 112(%%"REG_c")\n\t" " pfsub %%mm1, %%mm3\n\t" " pfsubr %%mm5, %%mm7\n\t" " pfmul 112(%%"REG_b"), %%mm3\n\t" " pfmul 112(%%"REG_b"), %%mm7\n\t" " movd %%mm3, 108(%%"REG_c")\n\t" " movd %%mm7, 124(%%"REG_c")\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm7\n\t" " movd %%mm3, 104(%%"REG_c")\n\t" " movd %%mm7, 120(%%"REG_c")\n\t" /* Phase 6. This is the end of easy road. */ /* Code below is coded in scalar mode. Should be optimized */ " movd "MANGLE(plus_1f)", %%mm6\n\t" " punpckldq 120(%%"REG_b"), %%mm6\n\t" /* mm6 = 1.0 | 120(%%"REG_b")*/ " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ " movq 32(%%"REG_c"), %%mm0\n\t" " movq 64(%%"REG_c"), %%mm2\n\t" " movq %%mm0, %%mm1\n\t" " movq %%mm2, %%mm3\n\t" " pxor %%mm7, %%mm1\n\t" " pxor %%mm7, %%mm3\n\t" " pfacc %%mm1, %%mm0\n\t" " pfacc %%mm3, %%mm2\n\t" " pfmul %%mm6, %%mm0\n\t" " pfmul %%mm6, %%mm2\n\t" " movq %%mm0, 32(%%"REG_d")\n\t" " movq %%mm2, 64(%%"REG_d")\n\t" " movd 44(%%"REG_c"), %%mm0\n\t" " movd 40(%%"REG_c"), %%mm2\n\t" " movd 120(%%"REG_b"), %%mm3\n\t" " punpckldq 76(%%"REG_c"), %%mm0\n\t" " punpckldq 72(%%"REG_c"), %%mm2\n\t" " punpckldq %%mm3, %%mm3\n\t" " movq %%mm0, %%mm4\n\t" " movq %%mm2, %%mm5\n\t" " pfsub %%mm2, %%mm0\n\t" " pfmul %%mm3, %%mm0\n\t" " movq %%mm0, %%mm1\n\t" " pfadd %%mm5, %%mm0\n\t" " pfadd %%mm4, %%mm0\n\t" " movq %%mm0, %%mm2\n\t" " punpckldq %%mm1, %%mm0\n\t" " punpckhdq %%mm1, %%mm2\n\t" " movq %%mm0, 40(%%"REG_d")\n\t" " movq %%mm2, 72(%%"REG_d")\n\t" " movd 48(%%"REG_c"), %%mm3\n\t" " movd 60(%%"REG_c"), %%mm2\n\t" " pfsub 52(%%"REG_c"), %%mm3\n\t" " pfsub 56(%%"REG_c"), %%mm2\n\t" " pfmul 120(%%"REG_b"), %%mm3\n\t" " pfmul 120(%%"REG_b"), %%mm2\n\t" " movq %%mm2, %%mm1\n\t" " pfadd 56(%%"REG_c"), %%mm1\n\t" " pfadd 60(%%"REG_c"), %%mm1\n\t" " movq %%mm1, %%mm0\n\t" " pfadd 48(%%"REG_c"), %%mm0\n\t" " pfadd 52(%%"REG_c"), %%mm0\n\t" " pfadd %%mm3, %%mm1\n\t" " punpckldq %%mm2, %%mm1\n\t" " pfadd %%mm3, %%mm2\n\t" " punpckldq %%mm2, %%mm0\n\t" " movq %%mm1, 56(%%"REG_d")\n\t" " movq %%mm0, 48(%%"REG_d")\n\t" /*---*/ " movd 92(%%"REG_c"), %%mm1\n\t" " pfsub 88(%%"REG_c"), %%mm1\n\t" " pfmul 120(%%"REG_b"), %%mm1\n\t" " movd %%mm1, 92(%%"REG_d")\n\t" " pfadd 92(%%"REG_c"), %%mm1\n\t" " pfadd 88(%%"REG_c"), %%mm1\n\t" " movq %%mm1, %%mm0\n\t" " pfadd 80(%%"REG_c"), %%mm0\n\t" " pfadd 84(%%"REG_c"), %%mm0\n\t" " movd %%mm0, 80(%%"REG_d")\n\t" " movd 80(%%"REG_c"), %%mm0\n\t" " pfsub 84(%%"REG_c"), %%mm0\n\t" " pfmul 120(%%"REG_b"), %%mm0\n\t" " pfadd %%mm0, %%mm1\n\t" " pfadd 92(%%"REG_d"), %%mm0\n\t" " punpckldq %%mm1, %%mm0\n\t" " movq %%mm0, 84(%%"REG_d")\n\t" " movq 96(%%"REG_c"), %%mm0\n\t" " movq %%mm0, %%mm1\n\t" " pxor %%mm7, %%mm1\n\t" " pfacc %%mm1, %%mm0\n\t" " pfmul %%mm6, %%mm0\n\t" " movq %%mm0, 96(%%"REG_d")\n\t" " movd 108(%%"REG_c"), %%mm0\n\t" " pfsub 104(%%"REG_c"), %%mm0\n\t" " pfmul 120(%%"REG_b"), %%mm0\n\t" " movd %%mm0, 108(%%"REG_d")\n\t" " pfadd 104(%%"REG_c"), %%mm0\n\t" " pfadd 108(%%"REG_c"), %%mm0\n\t" " movd %%mm0, 104(%%"REG_d")\n\t" " movd 124(%%"REG_c"), %%mm1\n\t" " pfsub 120(%%"REG_c"), %%mm1\n\t" " pfmul 120(%%"REG_b"), %%mm1\n\t" " movd %%mm1, 124(%%"REG_d")\n\t" " pfadd 120(%%"REG_c"), %%mm1\n\t" " pfadd 124(%%"REG_c"), %%mm1\n\t" " movq %%mm1, %%mm0\n\t" " pfadd 112(%%"REG_c"), %%mm0\n\t" " pfadd 116(%%"REG_c"), %%mm0\n\t" " movd %%mm0, 112(%%"REG_d")\n\t" " movd 112(%%"REG_c"), %%mm0\n\t" " pfsub 116(%%"REG_c"), %%mm0\n\t" " pfmul 120(%%"REG_b"), %%mm0\n\t" " pfadd %%mm0,%%mm1\n\t" " pfadd 124(%%"REG_d"), %%mm0\n\t" " punpckldq %%mm1, %%mm0\n\t" " movq %%mm0, 116(%%"REG_d")\n\t" // this code is broken, there is nothing modifying the z flag above. #if 0 " jnz .L01\n\t" /* Phase 7*/ /* Code below is coded in scalar mode. Should be optimized */ " movd (%%"REG_c"), %%mm0\n\t" " pfadd 4(%%"REG_c"), %%mm0\n\t" " movd %%mm0, 1024(%%"REG_S")\n\t" " movd (%%"REG_c"), %%mm0\n\t" " pfsub 4(%%"REG_c"), %%mm0\n\t" " pfmul 120(%%"REG_b"), %%mm0\n\t" " movd %%mm0, (%%"REG_S")\n\t" " movd %%mm0, (%%"REG_D")\n\t" " movd 12(%%"REG_c"), %%mm0\n\t" " pfsub 8(%%"REG_c"), %%mm0\n\t" " pfmul 120(%%"REG_b"), %%mm0\n\t" " movd %%mm0, 512(%%"REG_D")\n\t" " pfadd 12(%%"REG_c"), %%mm0\n\t" " pfadd 8(%%"REG_c"), %%mm0\n\t" " movd %%mm0, 512(%%"REG_S")\n\t" " movd 16(%%"REG_c"), %%mm0\n\t" " pfsub 20(%%"REG_c"), %%mm0\n\t" " pfmul 120(%%"REG_b"), %%mm0\n\t" " movq %%mm0, %%mm3\n\t" " movd 28(%%"REG_c"), %%mm0\n\t" " pfsub 24(%%"REG_c"), %%mm0\n\t" " pfmul 120(%%"REG_b"), %%mm0\n\t" " movd %%mm0, 768(%%"REG_D")\n\t" " movq %%mm0, %%mm2\n\t" " pfadd 24(%%"REG_c"), %%mm0\n\t" " pfadd 28(%%"REG_c"), %%mm0\n\t" " movq %%mm0, %%mm1\n\t" " pfadd 16(%%"REG_c"), %%mm0\n\t" " pfadd 20(%%"REG_c"), %%mm0\n\t" " movd %%mm0, 768(%%"REG_S")\n\t" " pfadd %%mm3, %%mm1\n\t" " movd %%mm1, 256(%%"REG_S")\n\t" " pfadd %%mm3, %%mm2\n\t" " movd %%mm2, 256(%%"REG_D")\n\t" /* Phase 8*/ " movq 32(%%"REG_d"), %%mm0\n\t" " movq 48(%%"REG_d"), %%mm1\n\t" " pfadd 48(%%"REG_d"), %%mm0\n\t" " pfadd 40(%%"REG_d"), %%mm1\n\t" " movd %%mm0, 896(%%"REG_S")\n\t" " movd %%mm1, 640(%%"REG_S")\n\t" " psrlq $32, %%mm0\n\t" " psrlq $32, %%mm1\n\t" " movd %%mm0, 128(%%"REG_D")\n\t" " movd %%mm1, 384(%%"REG_D")\n\t" " movd 40(%%"REG_d"), %%mm0\n\t" " pfadd 56(%%"REG_d"), %%mm0\n\t" " movd %%mm0, 384(%%"REG_S")\n\t" " movd 56(%%"REG_d"), %%mm0\n\t" " pfadd 36(%%"REG_d"), %%mm0\n\t" " movd %%mm0, 128(%%"REG_S")\n\t" " movd 60(%%"REG_d"), %%mm0\n\t" " movd %%mm0, 896(%%"REG_D")\n\t" " pfadd 44(%%"REG_d"), %%mm0\n\t" " movd %%mm0, 640(%%"REG_D")\n\t" " movq 96(%%"REG_d"), %%mm0\n\t" " movq 112(%%"REG_d"), %%mm2\n\t" " movq 104(%%"REG_d"), %%mm4\n\t" " pfadd 112(%%"REG_d"), %%mm0\n\t" " pfadd 104(%%"REG_d"), %%mm2\n\t" " pfadd 120(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm1\n\t" " movq %%mm2, %%mm3\n\t" " movq %%mm4, %%mm5\n\t" " pfadd 64(%%"REG_d"), %%mm0\n\t" " pfadd 80(%%"REG_d"), %%mm2\n\t" " pfadd 72(%%"REG_d"), %%mm4\n\t" " movd %%mm0, 960(%%"REG_S")\n\t" " movd %%mm2, 704(%%"REG_S")\n\t" " movd %%mm4, 448(%%"REG_S")\n\t" " psrlq $32, %%mm0\n\t" " psrlq $32, %%mm2\n\t" " psrlq $32, %%mm4\n\t" " movd %%mm0, 64(%%"REG_D")\n\t" " movd %%mm2, 320(%%"REG_D")\n\t" " movd %%mm4, 576(%%"REG_D")\n\t" " pfadd 80(%%"REG_d"), %%mm1\n\t" " pfadd 72(%%"REG_d"), %%mm3\n\t" " pfadd 88(%%"REG_d"), %%mm5\n\t" " movd %%mm1, 832(%%"REG_S")\n\t" " movd %%mm3, 576(%%"REG_S")\n\t" " movd %%mm5, 320(%%"REG_S")\n\t" " psrlq $32, %%mm1\n\t" " psrlq $32, %%mm3\n\t" " psrlq $32, %%mm5\n\t" " movd %%mm1, 192(%%"REG_D")\n\t" " movd %%mm3, 448(%%"REG_D")\n\t" " movd %%mm5, 704(%%"REG_D")\n\t" " movd 120(%%"REG_d"), %%mm0\n\t" " pfadd 100(%%"REG_d"), %%mm0\n\t" " movq %%mm0, %%mm1\n\t" " pfadd 88(%%"REG_d"), %%mm0\n\t" " movd %%mm0, 192(%%"REG_S")\n\t" " pfadd 68(%%"REG_d"), %%mm1\n\t" " movd %%mm1, 64(%%"REG_S")\n\t" " movd 124(%%"REG_d"), %%mm0\n\t" " movd %%mm0, 960(%%"REG_D")\n\t" " pfadd 92(%%"REG_d"), %%mm0\n\t" " movd %%mm0, 832(%%"REG_D")\n\t" " jmp .L_bye\n\t" ".L01:\n\t" #endif /* Phase 9*/ " movq (%%"REG_c"), %%mm0\n\t" " movq %%mm0, %%mm1\n\t" " pxor %%mm7, %%mm1\n\t" " pfacc %%mm1, %%mm0\n\t" " pfmul %%mm6, %%mm0\n\t" " pf2id %%mm0, %%mm0\n\t" " packssdw %%mm0, %%mm0\n\t" " movd %%mm0, %%"REG_a"\n\t" " movw %%ax, 512(%%"REG_S")\n\t" " shr $16, %%"REG_a"\n\t" " movw %%ax, (%%"REG_S")\n\t" " movd 12(%%"REG_c"), %%mm0\n\t" " pfsub 8(%%"REG_c"), %%mm0\n\t" " pfmul 120(%%"REG_b"), %%mm0\n\t" " pf2id %%mm0, %%mm7\n\t" " packssdw %%mm7, %%mm7\n\t" " movd %%mm7, %%"REG_a"\n\t" " movw %%ax, 256(%%"REG_D")\n\t" " pfadd 12(%%"REG_c"), %%mm0\n\t" " pfadd 8(%%"REG_c"), %%mm0\n\t" " pf2id %%mm0, %%mm0\n\t" " packssdw %%mm0, %%mm0\n\t" " movd %%mm0, %%"REG_a"\n\t" " movw %%ax, 256(%%"REG_S")\n\t" " movd 16(%%"REG_c"), %%mm3\n\t" " pfsub 20(%%"REG_c"), %%mm3\n\t" " pfmul 120(%%"REG_b"), %%mm3\n\t" " movq %%mm3, %%mm2\n\t" " movd 28(%%"REG_c"), %%mm2\n\t" " pfsub 24(%%"REG_c"), %%mm2\n\t" " pfmul 120(%%"REG_b"), %%mm2\n\t" " movq %%mm2, %%mm1\n\t" " pf2id %%mm2, %%mm7\n\t" " packssdw %%mm7, %%mm7\n\t" " movd %%mm7, %%"REG_a"\n\t" " movw %%ax, 384(%%"REG_D")\n\t" " pfadd 24(%%"REG_c"), %%mm1\n\t" " pfadd 28(%%"REG_c"), %%mm1\n\t" " movq %%mm1, %%mm0\n\t" " pfadd 16(%%"REG_c"), %%mm0\n\t" " pfadd 20(%%"REG_c"), %%mm0\n\t" " pf2id %%mm0, %%mm0\n\t" " packssdw %%mm0, %%mm0\n\t" " movd %%mm0, %%"REG_a"\n\t" " movw %%ax, 384(%%"REG_S")\n\t" " pfadd %%mm3, %%mm1\n\t" " pf2id %%mm1, %%mm1\n\t" " packssdw %%mm1, %%mm1\n\t" " movd %%mm1, %%"REG_a"\n\t" " movw %%ax, 128(%%"REG_S")\n\t" " pfadd %%mm3, %%mm2\n\t" " pf2id %%mm2, %%mm2\n\t" " packssdw %%mm2, %%mm2\n\t" " movd %%mm2, %%"REG_a"\n\t" " movw %%ax, 128(%%"REG_D")\n\t" /* Phase 10*/ " movq 32(%%"REG_d"), %%mm0\n\t" " movq 48(%%"REG_d"), %%mm1\n\t" " pfadd 48(%%"REG_d"), %%mm0\n\t" " pfadd 40(%%"REG_d"), %%mm1\n\t" " pf2id %%mm0, %%mm0\n\t" " pf2id %%mm1, %%mm1\n\t" " packssdw %%mm0, %%mm0\n\t" " packssdw %%mm1, %%mm1\n\t" " movd %%mm0, %%"REG_a"\n\t" " movd %%mm1, %%"REG_c"\n\t" " movw %%ax, 448(%%"REG_S")\n\t" " movw %%cx, 320(%%"REG_S")\n\t" " shr $16, %%"REG_a"\n\t" " shr $16, %%"REG_c"\n\t" " movw %%ax, 64(%%"REG_D")\n\t" " movw %%cx, 192(%%"REG_D")\n\t" " movd 40(%%"REG_d"), %%mm3\n\t" " movd 56(%%"REG_d"), %%mm4\n\t" " movd 60(%%"REG_d"), %%mm0\n\t" " movd 44(%%"REG_d"), %%mm2\n\t" " movd 120(%%"REG_d"), %%mm5\n\t" " punpckldq %%mm4, %%mm3\n\t" " punpckldq 124(%%"REG_d"), %%mm0\n\t" " pfadd 100(%%"REG_d"), %%mm5\n\t" " punpckldq 36(%%"REG_d"), %%mm4\n\t" " punpckldq 92(%%"REG_d"), %%mm2\n\t" " movq %%mm5, %%mm6\n\t" " pfadd %%mm4, %%mm3\n\t" " pf2id %%mm0, %%mm1\n\t" " pf2id %%mm3, %%mm3\n\t" " packssdw %%mm1, %%mm1\n\t" " packssdw %%mm3, %%mm3\n\t" " pfadd 88(%%"REG_d"), %%mm5\n\t" " movd %%mm1, %%"REG_a"\n\t" " movd %%mm3, %%"REG_c"\n\t" " movw %%ax, 448(%%"REG_D")\n\t" " movw %%cx, 192(%%"REG_S")\n\t" " pf2id %%mm5, %%mm5\n\t" " packssdw %%mm5, %%mm5\n\t" " shr $16, %%"REG_a"\n\t" " shr $16, %%"REG_c"\n\t" " movd %%mm5, %%"REG_b"\n\t" " movw %%bx, 96(%%"REG_S")\n\t" " movw %%ax, 480(%%"REG_D")\n\t" " movw %%cx, 64(%%"REG_S")\n\t" " pfadd %%mm2, %%mm0\n\t" " pf2id %%mm0, %%mm0\n\t" " packssdw %%mm0, %%mm0\n\t" " movd %%mm0, %%"REG_a"\n\t" " pfadd 68(%%"REG_d"), %%mm6\n\t" " movw %%ax, 320(%%"REG_D")\n\t" " shr $16, %%"REG_a"\n\t" " pf2id %%mm6, %%mm6\n\t" " packssdw %%mm6, %%mm6\n\t" " movd %%mm6, %%"REG_b"\n\t" " movw %%ax, 416(%%"REG_D")\n\t" " movw %%bx, 32(%%"REG_S")\n\t" " movq 96(%%"REG_d"), %%mm0\n\t" " movq 112(%%"REG_d"), %%mm2\n\t" " movq 104(%%"REG_d"), %%mm4\n\t" " pfadd %%mm2, %%mm0\n\t" " pfadd %%mm4, %%mm2\n\t" " pfadd 120(%%"REG_d"), %%mm4\n\t" " movq %%mm0, %%mm1\n\t" " movq %%mm2, %%mm3\n\t" " movq %%mm4, %%mm5\n\t" " pfadd 64(%%"REG_d"), %%mm0\n\t" " pfadd 80(%%"REG_d"), %%mm2\n\t" " pfadd 72(%%"REG_d"), %%mm4\n\t" " pf2id %%mm0, %%mm0\n\t" " pf2id %%mm2, %%mm2\n\t" " pf2id %%mm4, %%mm4\n\t" " packssdw %%mm0, %%mm0\n\t" " packssdw %%mm2, %%mm2\n\t" " packssdw %%mm4, %%mm4\n\t" " movd %%mm0, %%"REG_a"\n\t" " movd %%mm2, %%"REG_c"\n\t" " movd %%mm4, %%"REG_b"\n\t" " movw %%ax, 480(%%"REG_S")\n\t" " movw %%cx, 352(%%"REG_S")\n\t" " movw %%bx, 224(%%"REG_S")\n\t" " shr $16, %%"REG_a"\n\t" " shr $16, %%"REG_c"\n\t" " shr $16, %%"REG_b"\n\t" " movw %%ax, 32(%%"REG_D")\n\t" " movw %%cx, 160(%%"REG_D")\n\t" " movw %%bx, 288(%%"REG_D")\n\t" " pfadd 80(%%"REG_d"), %%mm1\n\t" " pfadd 72(%%"REG_d"), %%mm3\n\t" " pfadd 88(%%"REG_d"), %%mm5\n\t" " pf2id %%mm1, %%mm1\n\t" " pf2id %%mm3, %%mm3\n\t" " pf2id %%mm5, %%mm5\n\t" " packssdw %%mm1, %%mm1\n\t" " packssdw %%mm3, %%mm3\n\t" " packssdw %%mm5, %%mm5\n\t" " movd %%mm1, %%"REG_a"\n\t" " movd %%mm3, %%"REG_c"\n\t" " movd %%mm5, %%"REG_b"\n\t" " movw %%ax, 416(%%"REG_S")\n\t" " movw %%cx, 288(%%"REG_S")\n\t" " movw %%bx, 160(%%"REG_S")\n\t" " shr $16, %%"REG_a"\n\t" " shr $16, %%"REG_c"\n\t" " shr $16, %%"REG_b"\n\t" " movw %%ax, 96(%%"REG_D")\n\t" " movw %%cx, 224(%%"REG_D")\n\t" " movw %%bx, 352(%%"REG_D")\n\t" " movsw\n\t" ".L_bye:\n\t" " femms\n\t" : :"m"(a),"m"(b),"m"(c),"m"(tmp[0]) :"memory","%eax","%ebx","%ecx","%edx","%esi","%edi"); }