Mercurial > mplayer.hg
comparison liba52/srfftp_3dnow.h @ 885:cec1562ccf8a
Improvements
author | nickols_k |
---|---|
date | Sat, 26 May 2001 10:29:41 +0000 |
parents | |
children | cb432deedb92 |
comparison
equal
deleted
inserted
replaced
884:83b5359aee63 | 885:cec1562ccf8a |
---|---|
1 /* | |
2 * srfftp.h | |
3 * | |
4 * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000 | |
5 * | |
6 * 64 and 128 point split radix fft for ac3dec | |
7 * | |
8 * The algorithm is desribed in the book: | |
9 * "Computational Frameworks of the Fast Fourier Transform". | |
10 * | |
11 * The ideas and the the organization of code borrowed from djbfft written by | |
12 * D. J. Bernstein <djb@cr.py.to>. djbff can be found at | |
13 * http://cr.yp.to/djbfft.html. | |
14 * | |
15 * srfftp.h is free software; you can redistribute it and/or modify | |
16 * it under the terms of the GNU General Public License as published by | |
17 * the Free Software Foundation; either version 2, or (at your option) | |
18 * any later version. | |
19 * | |
20 * srfftp.h is distributed in the hope that it will be useful, | |
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 * GNU General Public License for more details. | |
24 * | |
25 * You should have received a copy of the GNU General Public License | |
26 * along with GNU Make; see the file COPYING. If not, write to | |
27 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | |
28 * | |
29 * Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations | |
30 * by Nick Kurshev <nickols_k@mail.ru> | |
31 */ | |
32 | |
33 #ifndef SRFFTP_3DNOW_H__ | |
34 #define SRFFTP_3DNOW_H__ | |
35 | |
36 #ifdef HAVE_3DNOWEX | |
37 #define TRANS_FILL_MM6_MM7_3DNOW()\ | |
38 asm(\ | |
39 "movl $-1, %%eax\n\t"\ | |
40 "movd %%eax, %%mm7\n\t"\ | |
41 "negl %%eax\n\t"\ | |
42 "movd %%eax, %%mm6\n\t"\ | |
43 "punpckldq %%mm6, %%mm7\n\t" /* -1.0 | 1.0 */\ | |
44 "pi2fd %%mm7, %%mm7\n\t"\ | |
45 "pswapd %%mm7, %%mm6\n\t"/* 1.0 | -1.0 */\ | |
46 :::"eax","memory"); | |
47 #else | |
48 #define TRANS_FILL_MM6_MM7_3DNOW()\ | |
49 asm(\ | |
50 "movl $-1, %%eax\n\t"\ | |
51 "movd %%eax, %%mm7\n\t"\ | |
52 "negl %%eax\n\t"\ | |
53 "movd %%eax, %%mm6\n\t"\ | |
54 "punpckldq %%mm6, %%mm7\n\t" /* -1.0 | 1.0 */\ | |
55 "punpckldq %%mm7, %%mm6\n\t" /* 1.0 | -1.0 */\ | |
56 "pi2fd %%mm7, %%mm7\n\t"\ | |
57 "pi2fd %%mm6, %%mm6\n\t"\ | |
58 :::"eax","memory"); | |
59 #endif | |
60 | |
61 #ifdef HAVE_3DNOWEX | |
62 #define PSWAP_MM(mm_base,mm_hlp) "pswapd "##mm_base","##mm_base" \n\t" | |
63 #else | |
64 #define PSWAP_MM(mm_base,mm_hlp)\ | |
65 "movq "##mm_base","##mm_hlp" \n\t"\ | |
66 "psrlq $32, "##mm_base"\n\t"\ | |
67 "punpckldq "##mm_hlp","##mm_base"\n\t" | |
68 #endif | |
69 | |
70 #define TRANSZERO_3DNOW(A0,A4,A8,A12) \ | |
71 { \ | |
72 asm volatile("femms":::"memory");\ | |
73 TRANS_FILL_MM6_MM7_3DNOW()\ | |
74 asm(\ | |
75 "movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\ | |
76 "movq %5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \ | |
77 "movq %%mm0, %%mm5\n\t"/*u.re = wTB[0].re + wTB[k*2].re;*/\ | |
78 "pfadd %%mm1, %%mm5\n\t"/*u.im = wTB[0].im + wTB[k*2].im; mm5 = u*/\ | |
79 "pfmul %%mm6, %%mm0\n\t"/*mm0 = wTB[0].re | -wTB[0].im */\ | |
80 "pfmul %%mm7, %%mm1\n\t"/*mm1 = -wTB[k*2].re | wTB[k*2].im */\ | |
81 "pfadd %%mm1, %%mm0\n\t"/*v.im = wTB[0].re - wTB[k*2].re;*/\ | |
82 "movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\ | |
83 PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\ | |
84 "movq %6, %%mm0\n\t" /* a1 = A0;*/\ | |
85 "movq %%mm0, %%mm1\n\t"\ | |
86 "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\ | |
87 "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\ | |
88 "movq %%mm0, %0\n\t"\ | |
89 "movq %%mm1, %1\n\t"\ | |
90 "movq %7, %%mm2\n\t" /* a1 = A4;*/\ | |
91 "movq %%mm2, %%mm3\n\t"\ | |
92 "pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\ | |
93 "pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\ | |
94 "movq %%mm2, %3\n\t"\ | |
95 "movq %%mm3, %2"\ | |
96 :"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\ | |
97 :"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\ | |
98 :"memory");\ | |
99 asm volatile("femms":::"memory");\ | |
100 } | |
101 | |
102 #endif |