Mercurial > mplayer.hg
annotate mp3lib/dct36_3dnow.c @ 32474:6d06be50007f
make indentation consistent (no content changes)
author | siretart |
---|---|
date | Thu, 28 Oct 2010 08:12:26 +0000 |
parents | d0f70692a140 |
children |
rev | line source |
---|---|
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
1 /* |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
2 * dct36_3dnow.c - 3DNow! optimized dct36() |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
3 * |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
4 * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
5 * <squash@mb.kcom.ne.jp>, only two types of changes have been made: |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
6 * |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
7 * - removed PREFETCH instruction for speedup |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
8 * - changed function name for support 3DNow! automatic detection |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
9 * |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
10 * You can find Kashiyama's original 3dnow! support patch |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
11 * (for mpg123-0.59o) at |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
12 * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
13 * |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
14 * by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999 |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
15 * <kim@comtec.co.jp> - after 1.Apr.1999 |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
16 * |
18783 | 17 * Modified for use with MPlayer, for details see the changelog at |
18 * http://svn.mplayerhq.hu/mplayer/trunk/ | |
15167
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
10322
diff
changeset
|
19 * $Id$ |
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
10322
diff
changeset
|
20 * |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
21 * Original disclaimer: |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
22 * The author of this program disclaim whole expressed or implied |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
23 * warranties with regard to this program, and in no event shall the |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
24 * author of this program liable to whatever resulted from the use of |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
25 * this program. Use it at your own risk. |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
26 * |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
27 * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
28 */ |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
29 |
28117
bd6833421e56
Consistently include config.h before mangle.h, fixes possible compilation
reimar
parents:
27757
diff
changeset
|
30 #include "config.h" |
16989 | 31 #include "mangle.h" |
30167
347d152a5cfa
Refactor real --> float #define to a typedef in a common header.
diego
parents:
28117
diff
changeset
|
32 #include "mpg123.h" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
33 #include "libavutil/x86_cpu.h" |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
34 |
25325
7c7885350d89
Identifiers starting with __ are reserved for the system.
diego
parents:
18783
diff
changeset
|
35 #ifdef DCT36_OPTIMIZE_FOR_K7 |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
36 void dct36_3dnowex(real *inbuf, real *o1, |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
37 real *o2, real *wintab, real *tsbuf) |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
38 #else |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
39 void dct36_3dnow(real *inbuf, real *o1, |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
40 real *o2, real *wintab, real *tsbuf) |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
41 #endif |
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
42 { |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
25325
diff
changeset
|
43 __asm__ volatile( |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
44 "movq (%%"REG_a"),%%mm0\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
45 "movq 4(%%"REG_a"),%%mm1\n\t" |
30990 | 46 "pfadd %%mm1,%%mm0\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
47 "movq %%mm0,4(%%"REG_a")\n\t" |
30990 | 48 "psrlq $32,%%mm1\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
49 "movq 12(%%"REG_a"),%%mm2\n\t" |
30990 | 50 "punpckldq %%mm2,%%mm1\n\t" |
51 "pfadd %%mm2,%%mm1\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
52 "movq %%mm1,12(%%"REG_a")\n\t" |
30990 | 53 "psrlq $32,%%mm2\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
54 "movq 20(%%"REG_a"),%%mm3\n\t" |
30990 | 55 "punpckldq %%mm3,%%mm2\n\t" |
56 "pfadd %%mm3,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
57 "movq %%mm2,20(%%"REG_a")\n\t" |
30990 | 58 "psrlq $32,%%mm3\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
59 "movq 28(%%"REG_a"),%%mm4\n\t" |
30990 | 60 "punpckldq %%mm4,%%mm3\n\t" |
61 "pfadd %%mm4,%%mm3\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
62 "movq %%mm3,28(%%"REG_a")\n\t" |
30990 | 63 "psrlq $32,%%mm4\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
64 "movq 36(%%"REG_a"),%%mm5\n\t" |
30990 | 65 "punpckldq %%mm5,%%mm4\n\t" |
66 "pfadd %%mm5,%%mm4\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
67 "movq %%mm4,36(%%"REG_a")\n\t" |
30990 | 68 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
69 "movq 44(%%"REG_a"),%%mm6\n\t" |
30990 | 70 "punpckldq %%mm6,%%mm5\n\t" |
71 "pfadd %%mm6,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
72 "movq %%mm5,44(%%"REG_a")\n\t" |
30990 | 73 "psrlq $32,%%mm6\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
74 "movq 52(%%"REG_a"),%%mm7\n\t" |
30990 | 75 "punpckldq %%mm7,%%mm6\n\t" |
76 "pfadd %%mm7,%%mm6\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
77 "movq %%mm6,52(%%"REG_a")\n\t" |
30990 | 78 "psrlq $32,%%mm7\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
79 "movq 60(%%"REG_a"),%%mm0\n\t" |
30990 | 80 "punpckldq %%mm0,%%mm7\n\t" |
81 "pfadd %%mm0,%%mm7\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
82 "movq %%mm7,60(%%"REG_a")\n\t" |
30990 | 83 "psrlq $32,%%mm0\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
84 "movd 68(%%"REG_a"),%%mm1\n\t" |
30990 | 85 "pfadd %%mm1,%%mm0\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
86 "movd %%mm0,68(%%"REG_a")\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
87 "movd 4(%%"REG_a"),%%mm0\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
88 "movd 12(%%"REG_a"),%%mm1\n\t" |
30990 | 89 "punpckldq %%mm1,%%mm0\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
90 "punpckldq 20(%%"REG_a"),%%mm1\n\t" |
30990 | 91 "pfadd %%mm1,%%mm0\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
92 "movd %%mm0,12(%%"REG_a")\n\t" |
30990 | 93 "psrlq $32,%%mm0\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
94 "movd %%mm0,20(%%"REG_a")\n\t" |
30990 | 95 "psrlq $32,%%mm1\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
96 "movd 28(%%"REG_a"),%%mm2\n\t" |
30990 | 97 "punpckldq %%mm2,%%mm1\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
98 "punpckldq 36(%%"REG_a"),%%mm2\n\t" |
30990 | 99 "pfadd %%mm2,%%mm1\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
100 "movd %%mm1,28(%%"REG_a")\n\t" |
30990 | 101 "psrlq $32,%%mm1\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
102 "movd %%mm1,36(%%"REG_a")\n\t" |
30990 | 103 "psrlq $32,%%mm2\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
104 "movd 44(%%"REG_a"),%%mm3\n\t" |
30990 | 105 "punpckldq %%mm3,%%mm2\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
106 "punpckldq 52(%%"REG_a"),%%mm3\n\t" |
30990 | 107 "pfadd %%mm3,%%mm2\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
108 "movd %%mm2,44(%%"REG_a")\n\t" |
30990 | 109 "psrlq $32,%%mm2\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
110 "movd %%mm2,52(%%"REG_a")\n\t" |
30990 | 111 "psrlq $32,%%mm3\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
112 "movd 60(%%"REG_a"),%%mm4\n\t" |
30990 | 113 "punpckldq %%mm4,%%mm3\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
114 "punpckldq 68(%%"REG_a"),%%mm4\n\t" |
30990 | 115 "pfadd %%mm4,%%mm3\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
116 "movd %%mm3,60(%%"REG_a")\n\t" |
30990 | 117 "psrlq $32,%%mm3\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
118 "movd %%mm3,68(%%"REG_a")\n\t" |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
119 |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
120 "movq 24(%%"REG_a"),%%mm0\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
121 "movq 48(%%"REG_a"),%%mm1\n\t" |
30990 | 122 "movd "MANGLE(COS9)"+12,%%mm2\n\t" |
123 "punpckldq %%mm2,%%mm2\n\t" | |
124 "movd "MANGLE(COS9)"+24,%%mm3\n\t" | |
125 "punpckldq %%mm3,%%mm3\n\t" | |
126 "pfmul %%mm2,%%mm0\n\t" | |
127 "pfmul %%mm3,%%mm1\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
128 "push %%"REG_a"\n\t" |
30990 | 129 "movl $1,%%eax\n\t" |
130 "movd %%eax,%%mm7\n\t" | |
131 "pi2fd %%mm7,%%mm7\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
132 "pop %%"REG_a"\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
133 "movq 8(%%"REG_a"),%%mm2\n\t" |
30990 | 134 "movd "MANGLE(COS9)"+4,%%mm3\n\t" |
135 "punpckldq %%mm3,%%mm3\n\t" | |
136 "pfmul %%mm3,%%mm2\n\t" | |
137 "pfadd %%mm0,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
138 "movq 40(%%"REG_a"),%%mm3\n\t" |
30990 | 139 "movd "MANGLE(COS9)"+20,%%mm4\n\t" |
140 "punpckldq %%mm4,%%mm4\n\t" | |
141 "pfmul %%mm4,%%mm3\n\t" | |
142 "pfadd %%mm3,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
143 "movq 56(%%"REG_a"),%%mm3\n\t" |
30990 | 144 "movd "MANGLE(COS9)"+28,%%mm4\n\t" |
145 "punpckldq %%mm4,%%mm4\n\t" | |
146 "pfmul %%mm4,%%mm3\n\t" | |
147 "pfadd %%mm3,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
148 "movq (%%"REG_a"),%%mm3\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
149 "movq 16(%%"REG_a"),%%mm4\n\t" |
30990 | 150 "movd "MANGLE(COS9)"+8,%%mm5\n\t" |
151 "punpckldq %%mm5,%%mm5\n\t" | |
152 "pfmul %%mm5,%%mm4\n\t" | |
153 "pfadd %%mm4,%%mm3\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
154 "movq 32(%%"REG_a"),%%mm4\n\t" |
30990 | 155 "movd "MANGLE(COS9)"+16,%%mm5\n\t" |
156 "punpckldq %%mm5,%%mm5\n\t" | |
157 "pfmul %%mm5,%%mm4\n\t" | |
158 "pfadd %%mm4,%%mm3\n\t" | |
159 "pfadd %%mm1,%%mm3\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
160 "movq 64(%%"REG_a"),%%mm4\n\t" |
30990 | 161 "movd "MANGLE(COS9)"+32,%%mm5\n\t" |
162 "punpckldq %%mm5,%%mm5\n\t" | |
163 "pfmul %%mm5,%%mm4\n\t" | |
164 "pfadd %%mm4,%%mm3\n\t" | |
165 "movq %%mm2,%%mm4\n\t" | |
166 "pfadd %%mm3,%%mm4\n\t" | |
167 "movq %%mm7,%%mm5\n\t" | |
168 "punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t" | |
169 "pfmul %%mm5,%%mm4\n\t" | |
170 "movq %%mm4,%%mm5\n\t" | |
171 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
172 "movd 108(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
173 "punpckldq 104(%%"REG_d"),%%mm6\n\t" |
30990 | 174 "pfmul %%mm6,%%mm5\n\t" |
25325
7c7885350d89
Identifiers starting with __ are reserved for the system.
diego
parents:
18783
diff
changeset
|
175 #ifdef DCT36_OPTIMIZE_FOR_K7 |
30990 | 176 "pswapd %%mm5,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
177 "movq %%mm5,32(%%"REG_c")\n\t" |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
178 #else |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
179 "movd %%mm5,36(%%"REG_c")\n\t" |
30990 | 180 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
181 "movd %%mm5,32(%%"REG_c")\n\t" |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
182 #endif |
30990 | 183 "movq %%mm4,%%mm6\n\t" |
184 "punpckldq %%mm6,%%mm5\n\t" | |
185 "pfsub %%mm6,%%mm5\n\t" | |
186 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
187 "movd 32(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
188 "punpckldq 36(%%"REG_d"),%%mm6\n\t" |
30990 | 189 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
190 "movd 32(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
191 "punpckldq 36(%%"REG_S"),%%mm6\n\t" |
30990 | 192 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
193 "movd %%mm5,1024(%%"REG_D")\n\t" |
30990 | 194 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
195 "movd %%mm5,1152(%%"REG_D")\n\t" |
30990 | 196 "movq %%mm3,%%mm4\n\t" |
197 "pfsub %%mm2,%%mm4\n\t" | |
198 "movq %%mm7,%%mm5\n\t" | |
199 "punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t" | |
200 "pfmul %%mm5,%%mm4\n\t" | |
201 "movq %%mm4,%%mm5\n\t" | |
202 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
203 "movd 140(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
204 "punpckldq 72(%%"REG_d"),%%mm6\n\t" |
30990 | 205 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
206 "movd %%mm5,68(%%"REG_c")\n\t" |
30990 | 207 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
208 "movd %%mm5,0(%%"REG_c")\n\t" |
30990 | 209 "movq %%mm4,%%mm6\n\t" |
210 "punpckldq %%mm6,%%mm5\n\t" | |
211 "pfsub %%mm6,%%mm5\n\t" | |
212 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
213 "movd 0(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
214 "punpckldq 68(%%"REG_d"),%%mm6\n\t" |
30990 | 215 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
216 "movd 0(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
217 "punpckldq 68(%%"REG_S"),%%mm6\n\t" |
30990 | 218 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
219 "movd %%mm5,0(%%"REG_D")\n\t" |
30990 | 220 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
221 "movd %%mm5,2176(%%"REG_D")\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
222 "movq 8(%%"REG_a"),%%mm2\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
223 "movq 40(%%"REG_a"),%%mm3\n\t" |
30990 | 224 "pfsub %%mm3,%%mm2\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
225 "movq 56(%%"REG_a"),%%mm3\n\t" |
30990 | 226 "pfsub %%mm3,%%mm2\n\t" |
227 "movd "MANGLE(COS9)"+12,%%mm3\n\t" | |
228 "punpckldq %%mm3,%%mm3\n\t" | |
229 "pfmul %%mm3,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
230 "movq 16(%%"REG_a"),%%mm3\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
231 "movq 32(%%"REG_a"),%%mm4\n\t" |
30990 | 232 "pfsub %%mm4,%%mm3\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
233 "movq 64(%%"REG_a"),%%mm4\n\t" |
30990 | 234 "pfsub %%mm4,%%mm3\n\t" |
235 "movd "MANGLE(COS9)"+24,%%mm4\n\t" | |
236 "punpckldq %%mm4,%%mm4\n\t" | |
237 "pfmul %%mm4,%%mm3\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
238 "movq 48(%%"REG_a"),%%mm4\n\t" |
30990 | 239 "pfsub %%mm4,%%mm3\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
240 "movq (%%"REG_a"),%%mm4\n\t" |
30990 | 241 "pfadd %%mm4,%%mm3\n\t" |
242 "movq %%mm2,%%mm4\n\t" | |
243 "pfadd %%mm3,%%mm4\n\t" | |
244 "movq %%mm7,%%mm5\n\t" | |
245 "punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t" | |
246 "pfmul %%mm5,%%mm4\n\t" | |
247 "movq %%mm4,%%mm5\n\t" | |
248 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
249 "movd 112(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
250 "punpckldq 100(%%"REG_d"),%%mm6\n\t" |
30990 | 251 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
252 "movd %%mm5,40(%%"REG_c")\n\t" |
30990 | 253 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
254 "movd %%mm5,28(%%"REG_c")\n\t" |
30990 | 255 "movq %%mm4,%%mm6\n\t" |
256 "punpckldq %%mm6,%%mm5\n\t" | |
257 "pfsub %%mm6,%%mm5\n\t" | |
258 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
259 "movd 28(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
260 "punpckldq 40(%%"REG_d"),%%mm6\n\t" |
30990 | 261 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
262 "movd 28(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
263 "punpckldq 40(%%"REG_S"),%%mm6\n\t" |
30990 | 264 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
265 "movd %%mm5,896(%%"REG_D")\n\t" |
30990 | 266 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
267 "movd %%mm5,1280(%%"REG_D")\n\t" |
30990 | 268 "movq %%mm3,%%mm4\n\t" |
269 "pfsub %%mm2,%%mm4\n\t" | |
270 "movq %%mm7,%%mm5\n\t" | |
271 "punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t" | |
272 "pfmul %%mm5,%%mm4\n\t" | |
273 "movq %%mm4,%%mm5\n\t" | |
274 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
275 "movd 136(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
276 "punpckldq 76(%%"REG_d"),%%mm6\n\t" |
30990 | 277 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
278 "movd %%mm5,64(%%"REG_c")\n\t" |
30990 | 279 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
280 "movd %%mm5,4(%%"REG_c")\n\t" |
30990 | 281 "movq %%mm4,%%mm6\n\t" |
282 "punpckldq %%mm6,%%mm5\n\t" | |
283 "pfsub %%mm6,%%mm5\n\t" | |
284 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
285 "movd 4(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
286 "punpckldq 64(%%"REG_d"),%%mm6\n\t" |
30990 | 287 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
288 "movd 4(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
289 "punpckldq 64(%%"REG_S"),%%mm6\n\t" |
30990 | 290 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
291 "movd %%mm5,128(%%"REG_D")\n\t" |
30990 | 292 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
293 "movd %%mm5,2048(%%"REG_D")\n\t" |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
294 |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
295 "movq 8(%%"REG_a"),%%mm2\n\t" |
30990 | 296 "movd "MANGLE(COS9)"+20,%%mm3\n\t" |
297 "punpckldq %%mm3,%%mm3\n\t" | |
298 "pfmul %%mm3,%%mm2\n\t" | |
299 "pfsub %%mm0,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
300 "movq 40(%%"REG_a"),%%mm3\n\t" |
30990 | 301 "movd "MANGLE(COS9)"+28,%%mm4\n\t" |
302 "punpckldq %%mm4,%%mm4\n\t" | |
303 "pfmul %%mm4,%%mm3\n\t" | |
304 "pfsub %%mm3,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
305 "movq 56(%%"REG_a"),%%mm3\n\t" |
30990 | 306 "movd "MANGLE(COS9)"+4,%%mm4\n\t" |
307 "punpckldq %%mm4,%%mm4\n\t" | |
308 "pfmul %%mm4,%%mm3\n\t" | |
309 "pfadd %%mm3,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
310 "movq (%%"REG_a"),%%mm3\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
311 "movq 16(%%"REG_a"),%%mm4\n\t" |
30990 | 312 "movd "MANGLE(COS9)"+32,%%mm5\n\t" |
313 "punpckldq %%mm5,%%mm5\n\t" | |
314 "pfmul %%mm5,%%mm4\n\t" | |
315 "pfsub %%mm4,%%mm3\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
316 "movq 32(%%"REG_a"),%%mm4\n\t" |
30990 | 317 "movd "MANGLE(COS9)"+8,%%mm5\n\t" |
318 "punpckldq %%mm5,%%mm5\n\t" | |
319 "pfmul %%mm5,%%mm4\n\t" | |
320 "pfsub %%mm4,%%mm3\n\t" | |
321 "pfadd %%mm1,%%mm3\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
322 "movq 64(%%"REG_a"),%%mm4\n\t" |
30990 | 323 "movd "MANGLE(COS9)"+16,%%mm5\n\t" |
324 "punpckldq %%mm5,%%mm5\n\t" | |
325 "pfmul %%mm5,%%mm4\n\t" | |
326 "pfadd %%mm4,%%mm3\n\t" | |
327 "movq %%mm2,%%mm4\n\t" | |
328 "pfadd %%mm3,%%mm4\n\t" | |
329 "movq %%mm7,%%mm5\n\t" | |
330 "punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t" | |
331 "pfmul %%mm5,%%mm4\n\t" | |
332 "movq %%mm4,%%mm5\n\t" | |
333 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
334 "movd 116(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
335 "punpckldq 96(%%"REG_d"),%%mm6\n\t" |
30990 | 336 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
337 "movd %%mm5,44(%%"REG_c")\n\t" |
30990 | 338 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
339 "movd %%mm5,24(%%"REG_c")\n\t" |
30990 | 340 "movq %%mm4,%%mm6\n\t" |
341 "punpckldq %%mm6,%%mm5\n\t" | |
342 "pfsub %%mm6,%%mm5\n\t" | |
343 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
344 "movd 24(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
345 "punpckldq 44(%%"REG_d"),%%mm6\n\t" |
30990 | 346 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
347 "movd 24(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
348 "punpckldq 44(%%"REG_S"),%%mm6\n\t" |
30990 | 349 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
350 "movd %%mm5,768(%%"REG_D")\n\t" |
30990 | 351 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
352 "movd %%mm5,1408(%%"REG_D")\n\t" |
30990 | 353 "movq %%mm3,%%mm4\n\t" |
354 "pfsub %%mm2,%%mm4\n\t" | |
355 "movq %%mm7,%%mm5\n\t" | |
356 "punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t" | |
357 "pfmul %%mm5,%%mm4\n\t" | |
358 "movq %%mm4,%%mm5\n\t" | |
359 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
360 "movd 132(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
361 "punpckldq 80(%%"REG_d"),%%mm6\n\t" |
30990 | 362 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
363 "movd %%mm5,60(%%"REG_c")\n\t" |
30990 | 364 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
365 "movd %%mm5,8(%%"REG_c")\n\t" |
30990 | 366 "movq %%mm4,%%mm6\n\t" |
367 "punpckldq %%mm6,%%mm5\n\t" | |
368 "pfsub %%mm6,%%mm5\n\t" | |
369 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
370 "movd 8(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
371 "punpckldq 60(%%"REG_d"),%%mm6\n\t" |
30990 | 372 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
373 "movd 8(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
374 "punpckldq 60(%%"REG_S"),%%mm6\n\t" |
30990 | 375 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
376 "movd %%mm5,256(%%"REG_D")\n\t" |
30990 | 377 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
378 "movd %%mm5,1920(%%"REG_D")\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
379 "movq 8(%%"REG_a"),%%mm2\n\t" |
30990 | 380 "movd "MANGLE(COS9)"+28,%%mm3\n\t" |
381 "punpckldq %%mm3,%%mm3\n\t" | |
382 "pfmul %%mm3,%%mm2\n\t" | |
383 "pfsub %%mm0,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
384 "movq 40(%%"REG_a"),%%mm3\n\t" |
30990 | 385 "movd "MANGLE(COS9)"+4,%%mm4\n\t" |
386 "punpckldq %%mm4,%%mm4\n\t" | |
387 "pfmul %%mm4,%%mm3\n\t" | |
388 "pfadd %%mm3,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
389 "movq 56(%%"REG_a"),%%mm3\n\t" |
30990 | 390 "movd "MANGLE(COS9)"+20,%%mm4\n\t" |
391 "punpckldq %%mm4,%%mm4\n\t" | |
392 "pfmul %%mm4,%%mm3\n\t" | |
393 "pfsub %%mm3,%%mm2\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
394 "movq (%%"REG_a"),%%mm3\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
395 "movq 16(%%"REG_a"),%%mm4\n\t" |
30990 | 396 "movd "MANGLE(COS9)"+16,%%mm5\n\t" |
397 "punpckldq %%mm5,%%mm5\n\t" | |
398 "pfmul %%mm5,%%mm4\n\t" | |
399 "pfsub %%mm4,%%mm3\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
400 "movq 32(%%"REG_a"),%%mm4\n\t" |
30990 | 401 "movd "MANGLE(COS9)"+32,%%mm5\n\t" |
402 "punpckldq %%mm5,%%mm5\n\t" | |
403 "pfmul %%mm5,%%mm4\n\t" | |
404 "pfadd %%mm4,%%mm3\n\t" | |
405 "pfadd %%mm1,%%mm3\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
406 "movq 64(%%"REG_a"),%%mm4\n\t" |
30990 | 407 "movd "MANGLE(COS9)"+8,%%mm5\n\t" |
408 "punpckldq %%mm5,%%mm5\n\t" | |
409 "pfmul %%mm5,%%mm4\n\t" | |
410 "pfsub %%mm4,%%mm3\n\t" | |
411 "movq %%mm2,%%mm4\n\t" | |
412 "pfadd %%mm3,%%mm4\n\t" | |
413 "movq %%mm7,%%mm5\n\t" | |
414 "punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t" | |
415 "pfmul %%mm5,%%mm4\n\t" | |
416 "movq %%mm4,%%mm5\n\t" | |
417 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
418 "movd 120(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
419 "punpckldq 92(%%"REG_d"),%%mm6\n\t" |
30990 | 420 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
421 "movd %%mm5,48(%%"REG_c")\n\t" |
30990 | 422 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
423 "movd %%mm5,20(%%"REG_c")\n\t" |
30990 | 424 "movq %%mm4,%%mm6\n\t" |
425 "punpckldq %%mm6,%%mm5\n\t" | |
426 "pfsub %%mm6,%%mm5\n\t" | |
427 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
428 "movd 20(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
429 "punpckldq 48(%%"REG_d"),%%mm6\n\t" |
30990 | 430 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
431 "movd 20(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
432 "punpckldq 48(%%"REG_S"),%%mm6\n\t" |
30990 | 433 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
434 "movd %%mm5,640(%%"REG_D")\n\t" |
30990 | 435 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
436 "movd %%mm5,1536(%%"REG_D")\n\t" |
30990 | 437 "movq %%mm3,%%mm4\n\t" |
438 "pfsub %%mm2,%%mm4\n\t" | |
439 "movq %%mm7,%%mm5\n\t" | |
440 "punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t" | |
441 "pfmul %%mm5,%%mm4\n\t" | |
442 "movq %%mm4,%%mm5\n\t" | |
443 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
444 "movd 128(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
445 "punpckldq 84(%%"REG_d"),%%mm6\n\t" |
30990 | 446 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
447 "movd %%mm5,56(%%"REG_c")\n\t" |
30990 | 448 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
449 "movd %%mm5,12(%%"REG_c")\n\t" |
30990 | 450 "movq %%mm4,%%mm6\n\t" |
451 "punpckldq %%mm6,%%mm5\n\t" | |
452 "pfsub %%mm6,%%mm5\n\t" | |
453 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
454 "movd 12(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
455 "punpckldq 56(%%"REG_d"),%%mm6\n\t" |
30990 | 456 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
457 "movd 12(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
458 "punpckldq 56(%%"REG_S"),%%mm6\n\t" |
30990 | 459 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
460 "movd %%mm5,384(%%"REG_D")\n\t" |
30990 | 461 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
462 "movd %%mm5,1792(%%"REG_D")\n\t" |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
463 |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
464 "movq (%%"REG_a"),%%mm4\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
465 "movq 16(%%"REG_a"),%%mm3\n\t" |
30990 | 466 "pfsub %%mm3,%%mm4\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
467 "movq 32(%%"REG_a"),%%mm3\n\t" |
30990 | 468 "pfadd %%mm3,%%mm4\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
469 "movq 48(%%"REG_a"),%%mm3\n\t" |
30990 | 470 "pfsub %%mm3,%%mm4\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
471 "movq 64(%%"REG_a"),%%mm3\n\t" |
30990 | 472 "pfadd %%mm3,%%mm4\n\t" |
473 "movq %%mm7,%%mm5\n\t" | |
474 "punpckldq "MANGLE(tfcos36)"+16,%%mm5\n\t" | |
475 "pfmul %%mm5,%%mm4\n\t" | |
476 "movq %%mm4,%%mm5\n\t" | |
477 "pfacc %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
478 "movd 124(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
479 "punpckldq 88(%%"REG_d"),%%mm6\n\t" |
30990 | 480 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
481 "movd %%mm5,52(%%"REG_c")\n\t" |
30990 | 482 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
483 "movd %%mm5,16(%%"REG_c")\n\t" |
30990 | 484 "movq %%mm4,%%mm6\n\t" |
485 "punpckldq %%mm6,%%mm5\n\t" | |
486 "pfsub %%mm6,%%mm5\n\t" | |
487 "punpckhdq %%mm5,%%mm5\n\t" | |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
488 "movd 16(%%"REG_d"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
489 "punpckldq 52(%%"REG_d"),%%mm6\n\t" |
30990 | 490 "pfmul %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
491 "movd 16(%%"REG_S"),%%mm6\n\t" |
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
492 "punpckldq 52(%%"REG_S"),%%mm6\n\t" |
30990 | 493 "pfadd %%mm6,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
494 "movd %%mm5,512(%%"REG_D")\n\t" |
30990 | 495 "psrlq $32,%%mm5\n\t" |
31215
d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
reimar
parents:
31206
diff
changeset
|
496 "movd %%mm5,1664(%%"REG_D")\n\t" |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
497 |
30990 | 498 "femms\n\t" |
499 : | |
31206 | 500 : "a" (inbuf), "S" (o1), "c" (o2), "d" (wintab), "D" (tsbuf) |
30990 | 501 : "memory"); |
10322
9163bdb578a6
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
alex
parents:
diff
changeset
|
502 } |