Mercurial > mplayer.hg
annotate liba52/srfftp_3dnow.h @ 28992:947ef23ba798
Test if create_vdp_decoder() might succeed by calling it from config()
with a small value for max_reference_frames.
This does not make automatic recovery by using software decoder possible,
but lets MPlayer fail more graciously on - actually existing - buggy
hardware that does not support certain H264 widths when using
hardware accelerated decoding (784, 864, 944, 1024, 1808, 1888 pixels on
NVIDIA G98) and if the user tries to hardware-decode more samples at
the same time than supported.
Might break playback of H264 Intra-Only samples on hardware with very
little video memory.
author | cehoyos |
---|---|
date | Sat, 21 Mar 2009 20:11:05 +0000 |
parents | 31287e75b5d8 |
children | e83eef58b30a |
rev | line source |
---|---|
885 | 1 /* |
2 * srfftp.h | |
3 * | |
4 * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000 | |
5 * | |
6 * 64 and 128 point split radix fft for ac3dec | |
7 * | |
8 * The algorithm is desribed in the book: | |
9 * "Computational Frameworks of the Fast Fourier Transform". | |
10 * | |
11 * The ideas and the the organization of code borrowed from djbfft written by | |
12 * D. J. Bernstein <djb@cr.py.to>. djbff can be found at | |
13 * http://cr.yp.to/djbfft.html. | |
14 * | |
15 * srfftp.h is free software; you can redistribute it and/or modify | |
16 * it under the terms of the GNU General Public License as published by | |
17 * the Free Software Foundation; either version 2, or (at your option) | |
18 * any later version. | |
19 * | |
20 * srfftp.h is distributed in the hope that it will be useful, | |
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 * GNU General Public License for more details. | |
24 * | |
25 * You should have received a copy of the GNU General Public License | |
26 * along with GNU Make; see the file COPYING. If not, write to | |
27 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | |
28 * | |
29 * Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations | |
30 * by Nick Kurshev <nickols_k@mail.ru> | |
31 */ | |
32 | |
33 #ifndef SRFFTP_3DNOW_H__ | |
34 #define SRFFTP_3DNOW_H__ | |
35 | |
1173 | 36 typedef struct |
37 { | |
38 unsigned long val[2]; | |
39 }i_cmplx_t; | |
886 | 40 |
885 | 41 #define TRANS_FILL_MM6_MM7_3DNOW()\ |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
1173
diff
changeset
|
42 __asm__ volatile(\ |
1173 | 43 "movq %1, %%mm7\n\t"\ |
44 "movq %0, %%mm6\n\t"\ | |
45 ::"m"(x_plus_minus_3dnow),\ | |
46 "m"(x_minus_plus_3dnow)\ | |
47 :"memory"); | |
885 | 48 |
28335 | 49 #if HAVE_AMD3DNOWEXT |
1116 | 50 #define PSWAP_MM(mm_base,mm_hlp) "pswapd "mm_base","mm_base"\n\t" |
885 | 51 #else |
52 #define PSWAP_MM(mm_base,mm_hlp)\ | |
1116 | 53 "movq "mm_base","mm_hlp"\n\t"\ |
54 "psrlq $32, "mm_base"\n\t"\ | |
55 "punpckldq "mm_hlp","mm_base"\n\t" | |
885 | 56 #endif |
28335 | 57 #if HAVE_AMD3DNOWEXT |
1116 | 58 #define PFNACC_MM(mm_base,mm_hlp) "pfnacc "mm_base","mm_base"\n\t" |
920 | 59 #else |
60 #define PFNACC_MM(mm_base,mm_hlp)\ | |
1116 | 61 "movq "mm_base","mm_hlp"\n\t"\ |
62 "psrlq $32,"mm_hlp"\n\t"\ | |
63 "punpckldq "mm_hlp","mm_hlp"\n\t"\ | |
64 "pfsub "mm_hlp","mm_base"\n\t" | |
920 | 65 #endif |
885 | 66 |
67 #define TRANSZERO_3DNOW(A0,A4,A8,A12) \ | |
68 { \ | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
1173
diff
changeset
|
69 __asm__ volatile(\ |
885 | 70 "movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\ |
71 "movq %5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \ | |
72 "movq %%mm0, %%mm5\n\t"/*u.re = wTB[0].re + wTB[k*2].re;*/\ | |
73 "pfadd %%mm1, %%mm5\n\t"/*u.im = wTB[0].im + wTB[k*2].im; mm5 = u*/\ | |
1173 | 74 "pxor %%mm6, %%mm0\n\t"/*mm0 = wTB[0].re | -wTB[0].im */\ |
75 "pxor %%mm7, %%mm1\n\t"/*mm1 = -wTB[k*2].re | wTB[k*2].im */\ | |
885 | 76 "pfadd %%mm1, %%mm0\n\t"/*v.im = wTB[0].re - wTB[k*2].re;*/\ |
77 "movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\ | |
78 PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\ | |
79 "movq %6, %%mm0\n\t" /* a1 = A0;*/\ | |
886 | 80 "movq %7, %%mm2\n\t" /* a1 = A4;*/\ |
885 | 81 "movq %%mm0, %%mm1\n\t"\ |
886 | 82 "movq %%mm2, %%mm3\n\t"\ |
885 | 83 "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\ |
886 | 84 "pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\ |
1053 | 85 "movq %%mm0, %0\n\t"\ |
885 | 86 "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\ |
1053 | 87 "movq %%mm2, %3\n\t"\ |
886 | 88 "pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\ |
885 | 89 "movq %%mm1, %1\n\t"\ |
90 "movq %%mm3, %2"\ | |
91 :"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\ | |
1068
cda8756ac10f
Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents:
1053
diff
changeset
|
92 :"m"(wTB[0]), "m"(wTB[k*2]), "m"(A0), "m"(A4)\ |
885 | 93 :"memory");\ |
94 } | |
95 | |
886 | 96 #define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\ |
97 {\ | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
1173
diff
changeset
|
98 __asm__ volatile(\ |
886 | 99 "movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\ |
100 "movq %%mm0, %%mm1\n\t"\ | |
1173 | 101 "pxor %%mm7, %%mm1\n\t"\ |
886 | 102 "pfacc %%mm1, %%mm0\n\t"/*u.im = wTB[2].im - wTB[2].re; mm0 = u*/\ |
103 "movq %5, %%mm1\n\t" /*a.re = wTB[6].im - wTB[6].re; */\ | |
104 "movq %%mm1, %%mm2\n\t"\ | |
1173 | 105 "pxor %%mm7, %%mm1\n\t"\ |
886 | 106 "pfacc %%mm2, %%mm1\n\t"/*a.im = wTB[6].im + wTB[6].re; mm1 = a*/\ |
107 "movq %%mm1, %%mm2\n\t"\ | |
1173 | 108 "pxor %%mm7, %%mm2\n\t"/*v.im = u.re - a.re;*/\ |
886 | 109 "movq %%mm0, %%mm3\n\t"/*v.re = u.im + a.im;*/\ |
110 "pfadd %%mm2, %%mm3\n\t"\ | |
111 PSWAP_MM("%%mm3","%%mm2")/*mm3 = v*/\ | |
1173 | 112 "pxor %%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\ |
886 | 113 "pfadd %%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\ |
890 | 114 "movq %8, %%mm2\n\t"\ |
886 | 115 "pfmul %%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\ |
116 "pfmul %%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\ | |
117 "movq %6, %%mm1\n\t" /* a1 = A2;*/\ | |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
118 "movq %7, %%mm5\n\t" /* a1 = A6;*/\ |
886 | 119 "movq %%mm1, %%mm2\n\t"\ |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
120 "movq %%mm3, %%mm4\n\t"\ |
886 | 121 "pfadd %%mm0, %%mm1\n\t" /*A2 = a1 + u;*/\ |
1173 | 122 "pxor %%mm6, %%mm4\n\t"/*A6.re = a1.re + v.re;*/\ |
886 | 123 "pfsub %%mm0, %%mm2\n\t" /*A2 = a1 - u;*/\ |
1173 | 124 "pxor %%mm7, %%mm3\n\t"/*A14.re = a1.re - v.re;*/\ |
886 | 125 "movq %%mm1, %0\n\t"\ |
126 "movq %%mm2, %1\n\t"\ | |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
127 "movq %%mm5, %%mm2\n\t"\ |
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
128 "pfadd %%mm4, %%mm5\n\t"/*A6.im = a1.im - v.im;*/\ |
886 | 129 "pfadd %%mm3, %%mm2\n\t"/*A14.im = a1.im + v.im;*/\ |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
130 "movq %%mm5, %2\n\t"\ |
886 | 131 "movq %%mm2, %3"\ |
132 :"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\ | |
1068
cda8756ac10f
Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents:
1053
diff
changeset
|
133 :"m"(wTB[2]), "m"(wTB[6]), "m"(A2), "m"(A6), "m"(HSQRT2_3DNOW)\ |
886 | 134 :"memory");\ |
920 | 135 } |
136 | |
137 #define TRANS_3DNOW(A1,A5,A9,A13,WT,WB,D,D3)\ | |
138 { \ | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
1173
diff
changeset
|
139 __asm__ volatile(\ |
920 | 140 "movq %1, %%mm4\n\t"\ |
141 "movq %%mm4, %%mm5\n\t"\ | |
142 "punpckldq %%mm4, %%mm4\n\t"/*mm4 = D.re | D.re */\ | |
143 "punpckhdq %%mm5, %%mm5\n\t"/*mm5 = D.im | D.im */\ | |
144 "movq %0, %%mm0\n\t"\ | |
145 "pfmul %%mm0, %%mm4\n\t"/* mm4 =u.re | u.im */\ | |
146 "pfmul %%mm0, %%mm5\n\t"/* mm5 = a.re | a.im */\ | |
147 PSWAP_MM("%%mm5","%%mm3")\ | |
1173 | 148 "pxor %%mm7, %%mm5\n\t"\ |
920 | 149 "pfadd %%mm5, %%mm4\n\t"/* mm4 = u*/\ |
150 "movq %3, %%mm1\n\t"\ | |
151 "movq %2, %%mm0\n\t"\ | |
152 PSWAP_MM("%%mm1","%%mm3")\ | |
153 "movq %%mm0, %%mm2\n\t"\ | |
154 "pfmul %%mm1, %%mm0\n\t"/* mm0 = a*/\ | |
155 "pfmul %3, %%mm2\n\t"/* mm2 = v*/\ | |
156 PFNACC_MM("%%mm2","%%mm3")\ | |
157 "pfacc %%mm0, %%mm0\n\t"\ | |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
158 "movq %%mm4, %%mm5\n\t"\ |
920 | 159 "punpckldq %%mm0,%%mm2\n\t"/*mm2 = v.re | a.re*/\ |
1173 | 160 "pxor %%mm6, %%mm5\n\t"\ |
920 | 161 "movq %%mm2, %%mm3\n\t"\ |
1173 | 162 "pxor %%mm7, %%mm3\n\t"\ |
920 | 163 "pfadd %%mm3, %%mm5\n\t"\ |
164 PSWAP_MM("%%mm5","%%mm3")/* mm5 = v*/\ | |
165 "pfadd %%mm2, %%mm4\n\t"\ | |
166 :\ | |
167 :"m"(WT), "m"(D), "m"(WB), "m"(D3)\ | |
168 :"memory");\ | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
1173
diff
changeset
|
169 __asm__ volatile(\ |
920 | 170 "movq %4, %%mm0\n\t"/* a1 = A1*/\ |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
171 "movq %5, %%mm2\n\t"/* a1 = A5*/\ |
920 | 172 "movq %%mm0, %%mm1\n\t"\ |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
173 "movq %%mm2, %%mm3\n\t"\ |
920 | 174 "pfadd %%mm4, %%mm0\n\t"/*A1 = a1 + u*/\ |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
175 "pfsub %%mm5, %%mm2\n\t"/*A5 = a1 - v*/\ |
1053 | 176 "movq %%mm0, %0\n\t"\ |
920 | 177 "pfsub %%mm4, %%mm1\n\t"/*A9 = a1 - u*/\ |
1053 | 178 "movq %%mm2, %2\n\t"\ |
926
d44a690543ac
Better insns scheduling and moving out local variables
nickols_k
parents:
920
diff
changeset
|
179 "pfadd %%mm5, %%mm3\n\t"/*A9 = a1 + v*/\ |
920 | 180 "movq %%mm1, %1\n\t"\ |
181 "movq %%mm3, %3"\ | |
182 :"=m"(A1), "=m"(A9), "=m"(A5), "=m"(A13)\ | |
1068
cda8756ac10f
Minor improvements and gcc-3.0 (unstable) compilation fixes
nickols_k
parents:
1053
diff
changeset
|
183 :"m"(A1), "m"(A5)\ |
920 | 184 :"memory");\ |
886 | 185 } |
186 | |
885 | 187 #endif |