Mercurial > libavcodec.hg
annotate sparc/simple_idct_vis.c @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | 766ca433df3b |
children |
rev | line source |
---|---|
5618 | 1 /* |
2 * SPARC VIS optimized inverse DCT | |
3 * Copyright (c) 2007 Denes Balatoni < dbalatoni XatX interware XdotX hu > | |
4 * | |
5 * I did consult the following fine web page about dct | |
6 * http://www.geocities.com/ssavekar/dct.htm | |
7 * | |
8 * This file is part of FFmpeg. | |
9 * | |
10 * FFmpeg is free software; you can redistribute it and/or | |
11 * modify it under the terms of the GNU Lesser General Public | |
12 * License as published by the Free Software Foundation; either | |
13 * version 2.1 of the License, or (at your option) any later version. | |
14 * | |
15 * FFmpeg is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 * Lesser General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU Lesser General Public | |
21 * License along with FFmpeg; if not, write to the Free Software | |
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 */ | |
24 | |
6763 | 25 #include "libavcodec/dsputil.h" |
11457 | 26 #include "dsputil_vis.h" |
5618 | 27 |
11369 | 28 static const DECLARE_ALIGNED(8, int16_t, coeffs)[28] = { |
5618 | 29 - 1259,- 1259,- 1259,- 1259, |
30 - 4989,- 4989,- 4989,- 4989, | |
31 -11045,-11045,-11045,-11045, | |
32 -19195,-19195,-19195,-19195, | |
33 -29126,-29126,-29126,-29126, | |
34 25080, 25080, 25080, 25080, | |
35 12785, 12785, 12785, 12785 | |
36 }; | |
11369 | 37 static const DECLARE_ALIGNED(8, uint16_t, scale)[4] = { |
5618 | 38 65536>>6, 65536>>6, 65536>>6, 65536>>6 |
39 }; | |
11369 | 40 static const DECLARE_ALIGNED(8, uint16_t, rounder)[4] = { |
5618 | 41 1<<5, 1<<5, 1<<5, 1<<5 |
42 }; | |
11369 | 43 static const DECLARE_ALIGNED(8, uint16_t, expand)[4] = { |
5618 | 44 1<<14, 1<<14, 1<<14, 1<<14 |
45 }; | |
46 | |
47 #define INIT_IDCT \ | |
48 "ldd [%1], %%f32 \n\t"\ | |
49 "ldd [%1+8], %%f34 \n\t"\ | |
50 "ldd [%1+16], %%f36 \n\t"\ | |
51 "ldd [%1+24], %%f38 \n\t"\ | |
52 "ldd [%1+32], %%f40 \n\t"\ | |
53 "ldd [%1+40], %%f42 \n\t"\ | |
54 "ldd [%1+48], %%f44 \n\t"\ | |
55 "ldd [%0], %%f46 \n\t"\ | |
56 "fzero %%f62 \n\t"\ | |
57 | |
58 #define LOADSCALE(in) \ | |
59 "ldd [" in "], %%f0 \n\t"\ | |
60 "ldd [" in "+16], %%f2 \n\t"\ | |
61 "ldd [" in "+32], %%f4 \n\t"\ | |
62 "ldd [" in "+48], %%f6 \n\t"\ | |
63 "ldd [" in "+64], %%f8 \n\t"\ | |
64 "ldd [" in "+80], %%f10 \n\t"\ | |
65 "ldd [" in "+96], %%f12 \n\t"\ | |
66 "ldd [" in "+112], %%f14 \n\t"\ | |
67 "fpadd16 %%f0, %%f0, %%f0 \n\t"\ | |
68 "fpadd16 %%f2, %%f2, %%f2 \n\t"\ | |
69 "fpadd16 %%f4, %%f4, %%f4 \n\t"\ | |
70 "fpadd16 %%f6, %%f6, %%f6 \n\t"\ | |
71 "fpadd16 %%f8, %%f8, %%f8 \n\t"\ | |
72 "fpadd16 %%f10, %%f10, %%f10 \n\t"\ | |
73 "fpadd16 %%f12, %%f12, %%f12 \n\t"\ | |
74 "fpadd16 %%f14, %%f14, %%f14 \n\t"\ | |
75 \ | |
76 "fpadd16 %%f0, %%f0, %%f0 \n\t"\ | |
77 "fpadd16 %%f2, %%f2, %%f2 \n\t"\ | |
78 "fpadd16 %%f4, %%f4, %%f4 \n\t"\ | |
79 "fpadd16 %%f6, %%f6, %%f6 \n\t"\ | |
80 "fpadd16 %%f8, %%f8, %%f8 \n\t"\ | |
81 "fpadd16 %%f10, %%f10, %%f10 \n\t"\ | |
82 "fpadd16 %%f12, %%f12, %%f12 \n\t"\ | |
83 "fpadd16 %%f14, %%f14, %%f14 \n\t"\ | |
84 \ | |
85 "fpadd16 %%f0, %%f0, %%f0 \n\t"\ | |
86 "fpadd16 %%f2, %%f2, %%f2 \n\t"\ | |
87 "fpadd16 %%f4, %%f4, %%f4 \n\t"\ | |
88 "fpadd16 %%f6, %%f6, %%f6 \n\t"\ | |
89 "fpadd16 %%f8, %%f8, %%f8 \n\t"\ | |
90 "fpadd16 %%f10, %%f10, %%f10 \n\t"\ | |
91 "fpadd16 %%f12, %%f12, %%f12 \n\t"\ | |
92 "fpadd16 %%f14, %%f14, %%f14 \n\t"\ | |
93 \ | |
94 "fpadd16 %%f0, %%f0, %%f0 \n\t"\ | |
95 "fpadd16 %%f2, %%f2, %%f2 \n\t"\ | |
96 "fpadd16 %%f4, %%f4, %%f4 \n\t"\ | |
97 "fpadd16 %%f6, %%f6, %%f6 \n\t"\ | |
98 "fpadd16 %%f8, %%f8, %%f8 \n\t"\ | |
99 "fpadd16 %%f10, %%f10, %%f10 \n\t"\ | |
100 "fpadd16 %%f12, %%f12, %%f12 \n\t"\ | |
101 "fpadd16 %%f14, %%f14, %%f14 \n\t"\ | |
102 | |
103 #define LOAD(in) \ | |
104 "ldd [" in "], %%f16 \n\t"\ | |
105 "ldd [" in "+8], %%f18 \n\t"\ | |
106 "ldd [" in "+16], %%f20 \n\t"\ | |
107 "ldd [" in "+24], %%f22 \n\t"\ | |
108 "ldd [" in "+32], %%f24 \n\t"\ | |
109 "ldd [" in "+40], %%f26 \n\t"\ | |
110 "ldd [" in "+48], %%f28 \n\t"\ | |
111 "ldd [" in "+56], %%f30 \n\t"\ | |
112 | |
113 #define TRANSPOSE \ | |
114 "fpmerge %%f16, %%f24, %%f0 \n\t"\ | |
115 "fpmerge %%f20, %%f28, %%f2 \n\t"\ | |
116 "fpmerge %%f17, %%f25, %%f4 \n\t"\ | |
117 "fpmerge %%f21, %%f29, %%f6 \n\t"\ | |
118 "fpmerge %%f18, %%f26, %%f8 \n\t"\ | |
119 "fpmerge %%f22, %%f30, %%f10 \n\t"\ | |
120 "fpmerge %%f19, %%f27, %%f12 \n\t"\ | |
121 "fpmerge %%f23, %%f31, %%f14 \n\t"\ | |
122 \ | |
123 "fpmerge %%f0, %%f2, %%f16 \n\t"\ | |
124 "fpmerge %%f1, %%f3, %%f18 \n\t"\ | |
125 "fpmerge %%f4, %%f6, %%f20 \n\t"\ | |
126 "fpmerge %%f5, %%f7, %%f22 \n\t"\ | |
127 "fpmerge %%f8, %%f10, %%f24 \n\t"\ | |
128 "fpmerge %%f9, %%f11, %%f26 \n\t"\ | |
129 "fpmerge %%f12, %%f14, %%f28 \n\t"\ | |
130 "fpmerge %%f13, %%f15, %%f30 \n\t"\ | |
131 \ | |
132 "fpmerge %%f16, %%f17, %%f0 \n\t"\ | |
133 "fpmerge %%f18, %%f19, %%f2 \n\t"\ | |
134 "fpmerge %%f20, %%f21, %%f4 \n\t"\ | |
135 "fpmerge %%f22, %%f23, %%f6 \n\t"\ | |
136 "fpmerge %%f24, %%f25, %%f8 \n\t"\ | |
137 "fpmerge %%f26, %%f27, %%f10 \n\t"\ | |
138 "fpmerge %%f28, %%f29, %%f12 \n\t"\ | |
139 "fpmerge %%f30, %%f31, %%f14 \n\t"\ | |
140 | |
141 #define IDCT4ROWS \ | |
142 /* 1. column */\ | |
143 "fmul8ulx16 %%f0, %%f38, %%f28 \n\t"\ | |
144 "for %%f4, %%f6, %%f60 \n\t"\ | |
145 "fmul8ulx16 %%f2, %%f32, %%f18 \n\t"\ | |
146 "fmul8ulx16 %%f2, %%f36, %%f22 \n\t"\ | |
147 "fmul8ulx16 %%f2, %%f40, %%f26 \n\t"\ | |
148 "fmul8ulx16 %%f2, %%f44, %%f30 \n\t"\ | |
149 \ | |
150 ADDROUNDER\ | |
151 \ | |
152 "fmul8sux16 %%f0, %%f38, %%f48 \n\t"\ | |
153 "fcmpd %%fcc0, %%f62, %%f60 \n\t"\ | |
154 "for %%f8, %%f10, %%f60 \n\t"\ | |
155 "fmul8sux16 %%f2, %%f32, %%f50 \n\t"\ | |
156 "fmul8sux16 %%f2, %%f36, %%f52 \n\t"\ | |
157 "fmul8sux16 %%f2, %%f40, %%f54 \n\t"\ | |
158 "fmul8sux16 %%f2, %%f44, %%f56 \n\t"\ | |
159 \ | |
160 "fpadd16 %%f48, %%f28, %%f28 \n\t"\ | |
161 "fcmpd %%fcc1, %%f62, %%f60 \n\t"\ | |
162 "for %%f12, %%f14, %%f60 \n\t"\ | |
163 "fpadd16 %%f50, %%f18, %%f18 \n\t"\ | |
164 "fpadd16 %%f52, %%f22, %%f22 \n\t"\ | |
165 "fpadd16 %%f54, %%f26, %%f26 \n\t"\ | |
166 "fpadd16 %%f56, %%f30, %%f30 \n\t"\ | |
167 \ | |
168 "fpadd16 %%f28, %%f0, %%f16 \n\t"\ | |
169 "fcmpd %%fcc2, %%f62, %%f60 \n\t"\ | |
170 "fpadd16 %%f28, %%f0, %%f20 \n\t"\ | |
171 "fpadd16 %%f28, %%f0, %%f24 \n\t"\ | |
172 "fpadd16 %%f28, %%f0, %%f28 \n\t"\ | |
173 "fpadd16 %%f18, %%f2, %%f18 \n\t"\ | |
174 "fpadd16 %%f22, %%f2, %%f22 \n\t"\ | |
175 /* 2. column */\ | |
176 "fbe %%fcc0, 3f \n\t"\ | |
177 "fpadd16 %%f26, %%f2, %%f26 \n\t"\ | |
178 "fmul8ulx16 %%f4, %%f34, %%f48 \n\t"\ | |
179 "fmul8ulx16 %%f4, %%f42, %%f50 \n\t"\ | |
180 "fmul8ulx16 %%f6, %%f36, %%f52 \n\t"\ | |
181 "fmul8ulx16 %%f6, %%f44, %%f54 \n\t"\ | |
182 "fmul8ulx16 %%f6, %%f32, %%f56 \n\t"\ | |
183 "fmul8ulx16 %%f6, %%f40, %%f58 \n\t"\ | |
184 \ | |
185 "fpadd16 %%f16, %%f48, %%f16 \n\t"\ | |
186 "fpadd16 %%f20, %%f50, %%f20 \n\t"\ | |
187 "fpsub16 %%f24, %%f50, %%f24 \n\t"\ | |
188 "fpsub16 %%f28, %%f48, %%f28 \n\t"\ | |
189 "fpadd16 %%f18, %%f52, %%f18 \n\t"\ | |
190 "fpsub16 %%f22, %%f54, %%f22 \n\t"\ | |
191 "fpsub16 %%f26, %%f56, %%f26 \n\t"\ | |
192 "fpsub16 %%f30, %%f58, %%f30 \n\t"\ | |
193 \ | |
194 "fmul8sux16 %%f4, %%f34, %%f48 \n\t"\ | |
195 "fmul8sux16 %%f4, %%f42, %%f50 \n\t"\ | |
196 "fmul8sux16 %%f6, %%f36, %%f52 \n\t"\ | |
197 "fmul8sux16 %%f6, %%f44, %%f54 \n\t"\ | |
198 "fmul8sux16 %%f6, %%f32, %%f56 \n\t"\ | |
199 "fmul8sux16 %%f6, %%f40, %%f58 \n\t"\ | |
200 \ | |
201 "fpadd16 %%f16, %%f48, %%f16 \n\t"\ | |
202 "fpadd16 %%f20, %%f50, %%f20 \n\t"\ | |
203 "fpsub16 %%f24, %%f50, %%f24 \n\t"\ | |
204 "fpsub16 %%f28, %%f48, %%f28 \n\t"\ | |
205 "fpadd16 %%f18, %%f52, %%f18 \n\t"\ | |
206 "fpsub16 %%f22, %%f54, %%f22 \n\t"\ | |
207 "fpsub16 %%f26, %%f56, %%f26 \n\t"\ | |
208 "fpsub16 %%f30, %%f58, %%f30 \n\t"\ | |
209 \ | |
210 "fpadd16 %%f16, %%f4, %%f16 \n\t"\ | |
211 "fpsub16 %%f28, %%f4, %%f28 \n\t"\ | |
212 "fpadd16 %%f18, %%f6, %%f18 \n\t"\ | |
213 "fpsub16 %%f26, %%f6, %%f26 \n\t"\ | |
214 /* 3. column */\ | |
215 "3: \n\t"\ | |
216 "fbe %%fcc1, 4f \n\t"\ | |
217 "fpsub16 %%f30, %%f6, %%f30 \n\t"\ | |
218 "fmul8ulx16 %%f8, %%f38, %%f48 \n\t"\ | |
219 "fmul8ulx16 %%f10, %%f40, %%f50 \n\t"\ | |
220 "fmul8ulx16 %%f10, %%f32, %%f52 \n\t"\ | |
221 "fmul8ulx16 %%f10, %%f44, %%f54 \n\t"\ | |
222 "fmul8ulx16 %%f10, %%f36, %%f56 \n\t"\ | |
223 \ | |
224 "fpadd16 %%f16, %%f48, %%f16 \n\t"\ | |
225 "fpsub16 %%f20, %%f48, %%f20 \n\t"\ | |
226 "fpsub16 %%f24, %%f48, %%f24 \n\t"\ | |
227 "fpadd16 %%f28, %%f48, %%f28 \n\t"\ | |
228 "fpadd16 %%f18, %%f50, %%f18 \n\t"\ | |
229 "fpsub16 %%f22, %%f52, %%f22 \n\t"\ | |
230 "fpadd16 %%f26, %%f54, %%f26 \n\t"\ | |
231 "fpadd16 %%f30, %%f56, %%f30 \n\t"\ | |
232 \ | |
233 "fmul8sux16 %%f8, %%f38, %%f48 \n\t"\ | |
234 "fmul8sux16 %%f10, %%f40, %%f50 \n\t"\ | |
235 "fmul8sux16 %%f10, %%f32, %%f52 \n\t"\ | |
236 "fmul8sux16 %%f10, %%f44, %%f54 \n\t"\ | |
237 "fmul8sux16 %%f10, %%f36, %%f56 \n\t"\ | |
238 \ | |
239 "fpadd16 %%f16, %%f48, %%f16 \n\t"\ | |
240 "fpsub16 %%f20, %%f48, %%f20 \n\t"\ | |
241 "fpsub16 %%f24, %%f48, %%f24 \n\t"\ | |
242 "fpadd16 %%f28, %%f48, %%f28 \n\t"\ | |
243 "fpadd16 %%f18, %%f50, %%f18 \n\t"\ | |
244 "fpsub16 %%f22, %%f52, %%f22 \n\t"\ | |
245 "fpadd16 %%f26, %%f54, %%f26 \n\t"\ | |
246 "fpadd16 %%f30, %%f56, %%f30 \n\t"\ | |
247 \ | |
248 "fpadd16 %%f16, %%f8, %%f16 \n\t"\ | |
249 "fpsub16 %%f20, %%f8, %%f20 \n\t"\ | |
250 "fpsub16 %%f24, %%f8, %%f24 \n\t"\ | |
251 "fpadd16 %%f28, %%f8, %%f28 \n\t"\ | |
252 "fpadd16 %%f18, %%f10, %%f18 \n\t"\ | |
253 "fpsub16 %%f22, %%f10, %%f22 \n\t"\ | |
254 /* 4. column */\ | |
255 "4: \n\t"\ | |
256 "fbe %%fcc2, 5f \n\t"\ | |
257 "fpadd16 %%f30, %%f10, %%f30 \n\t"\ | |
258 "fmul8ulx16 %%f12, %%f42, %%f48 \n\t"\ | |
259 "fmul8ulx16 %%f12, %%f34, %%f50 \n\t"\ | |
260 "fmul8ulx16 %%f14, %%f44, %%f52 \n\t"\ | |
261 "fmul8ulx16 %%f14, %%f40, %%f54 \n\t"\ | |
262 "fmul8ulx16 %%f14, %%f36, %%f56 \n\t"\ | |
263 "fmul8ulx16 %%f14, %%f32, %%f58 \n\t"\ | |
264 \ | |
265 "fpadd16 %%f16, %%f48, %%f16 \n\t"\ | |
266 "fpsub16 %%f20, %%f50, %%f20 \n\t"\ | |
267 "fpadd16 %%f24, %%f50, %%f24 \n\t"\ | |
268 "fpsub16 %%f28, %%f48, %%f28 \n\t"\ | |
269 "fpadd16 %%f18, %%f52, %%f18 \n\t"\ | |
270 "fpsub16 %%f22, %%f54, %%f22 \n\t"\ | |
271 "fpadd16 %%f26, %%f56, %%f26 \n\t"\ | |
272 "fpsub16 %%f30, %%f58, %%f30 \n\t"\ | |
273 \ | |
274 "fmul8sux16 %%f12, %%f42, %%f48 \n\t"\ | |
275 "fmul8sux16 %%f12, %%f34, %%f50 \n\t"\ | |
276 "fmul8sux16 %%f14, %%f44, %%f52 \n\t"\ | |
277 "fmul8sux16 %%f14, %%f40, %%f54 \n\t"\ | |
278 "fmul8sux16 %%f14, %%f36, %%f56 \n\t"\ | |
279 "fmul8sux16 %%f14, %%f32, %%f58 \n\t"\ | |
280 \ | |
281 "fpadd16 %%f16, %%f48, %%f16 \n\t"\ | |
282 "fpsub16 %%f20, %%f50, %%f20 \n\t"\ | |
283 "fpadd16 %%f24, %%f50, %%f24 \n\t"\ | |
284 "fpsub16 %%f28, %%f48, %%f28 \n\t"\ | |
285 "fpadd16 %%f18, %%f52, %%f18 \n\t"\ | |
286 "fpsub16 %%f22, %%f54, %%f22 \n\t"\ | |
287 "fpadd16 %%f26, %%f56, %%f26 \n\t"\ | |
288 "fpsub16 %%f30, %%f58, %%f30 \n\t"\ | |
289 \ | |
290 "fpsub16 %%f20, %%f12, %%f20 \n\t"\ | |
291 "fpadd16 %%f24, %%f12, %%f24 \n\t"\ | |
292 "fpsub16 %%f22, %%f14, %%f22 \n\t"\ | |
293 "fpadd16 %%f26, %%f14, %%f26 \n\t"\ | |
294 "fpsub16 %%f30, %%f14, %%f30 \n\t"\ | |
295 /* final butterfly */\ | |
296 "5: \n\t"\ | |
297 "fpsub16 %%f16, %%f18, %%f48 \n\t"\ | |
298 "fpsub16 %%f20, %%f22, %%f50 \n\t"\ | |
299 "fpsub16 %%f24, %%f26, %%f52 \n\t"\ | |
300 "fpsub16 %%f28, %%f30, %%f54 \n\t"\ | |
301 "fpadd16 %%f16, %%f18, %%f16 \n\t"\ | |
302 "fpadd16 %%f20, %%f22, %%f20 \n\t"\ | |
303 "fpadd16 %%f24, %%f26, %%f24 \n\t"\ | |
304 "fpadd16 %%f28, %%f30, %%f28 \n\t"\ | |
305 | |
306 #define STOREROWS(out) \ | |
307 "std %%f48, [" out "+112] \n\t"\ | |
308 "std %%f50, [" out "+96] \n\t"\ | |
309 "std %%f52, [" out "+80] \n\t"\ | |
310 "std %%f54, [" out "+64] \n\t"\ | |
311 "std %%f16, [" out "] \n\t"\ | |
312 "std %%f20, [" out "+16] \n\t"\ | |
313 "std %%f24, [" out "+32] \n\t"\ | |
314 "std %%f28, [" out "+48] \n\t"\ | |
315 | |
316 #define SCALEROWS \ | |
317 "fmul8sux16 %%f46, %%f48, %%f48 \n\t"\ | |
318 "fmul8sux16 %%f46, %%f50, %%f50 \n\t"\ | |
319 "fmul8sux16 %%f46, %%f52, %%f52 \n\t"\ | |
320 "fmul8sux16 %%f46, %%f54, %%f54 \n\t"\ | |
321 "fmul8sux16 %%f46, %%f16, %%f16 \n\t"\ | |
322 "fmul8sux16 %%f46, %%f20, %%f20 \n\t"\ | |
323 "fmul8sux16 %%f46, %%f24, %%f24 \n\t"\ | |
324 "fmul8sux16 %%f46, %%f28, %%f28 \n\t"\ | |
325 | |
326 #define PUTPIXELSCLAMPED(dest) \ | |
327 "fpack16 %%f48, %%f14 \n\t"\ | |
328 "fpack16 %%f50, %%f12 \n\t"\ | |
329 "fpack16 %%f16, %%f0 \n\t"\ | |
330 "fpack16 %%f20, %%f2 \n\t"\ | |
331 "fpack16 %%f24, %%f4 \n\t"\ | |
332 "fpack16 %%f28, %%f6 \n\t"\ | |
333 "fpack16 %%f54, %%f8 \n\t"\ | |
334 "fpack16 %%f52, %%f10 \n\t"\ | |
335 "st %%f0, [%3+" dest "] \n\t"\ | |
336 "st %%f2, [%5+" dest "] \n\t"\ | |
337 "st %%f4, [%6+" dest "] \n\t"\ | |
338 "st %%f6, [%7+" dest "] \n\t"\ | |
339 "st %%f8, [%8+" dest "] \n\t"\ | |
340 "st %%f10, [%9+" dest "] \n\t"\ | |
341 "st %%f12, [%10+" dest "] \n\t"\ | |
342 "st %%f14, [%11+" dest "] \n\t"\ | |
343 | |
344 #define ADDPIXELSCLAMPED(dest) \ | |
345 "ldd [%5], %%f18 \n\t"\ | |
346 "ld [%3+" dest"], %%f0 \n\t"\ | |
347 "ld [%6+" dest"], %%f2 \n\t"\ | |
348 "ld [%7+" dest"], %%f4 \n\t"\ | |
349 "ld [%8+" dest"], %%f6 \n\t"\ | |
350 "ld [%9+" dest"], %%f8 \n\t"\ | |
351 "ld [%10+" dest"], %%f10 \n\t"\ | |
352 "ld [%11+" dest"], %%f12 \n\t"\ | |
353 "ld [%12+" dest"], %%f14 \n\t"\ | |
354 "fmul8x16 %%f0, %%f18, %%f0 \n\t"\ | |
355 "fmul8x16 %%f2, %%f18, %%f2 \n\t"\ | |
356 "fmul8x16 %%f4, %%f18, %%f4 \n\t"\ | |
357 "fmul8x16 %%f6, %%f18, %%f6 \n\t"\ | |
358 "fmul8x16 %%f8, %%f18, %%f8 \n\t"\ | |
359 "fmul8x16 %%f10, %%f18, %%f10 \n\t"\ | |
360 "fmul8x16 %%f12, %%f18, %%f12 \n\t"\ | |
361 "fmul8x16 %%f14, %%f18, %%f14 \n\t"\ | |
362 "fpadd16 %%f0, %%f16, %%f0 \n\t"\ | |
363 "fpadd16 %%f2, %%f20, %%f2 \n\t"\ | |
364 "fpadd16 %%f4, %%f24, %%f4 \n\t"\ | |
365 "fpadd16 %%f6, %%f28, %%f6 \n\t"\ | |
366 "fpadd16 %%f8, %%f54, %%f8 \n\t"\ | |
367 "fpadd16 %%f10, %%f52, %%f10 \n\t"\ | |
368 "fpadd16 %%f12, %%f50, %%f12 \n\t"\ | |
369 "fpadd16 %%f14, %%f48, %%f14 \n\t"\ | |
370 "fpack16 %%f0, %%f0 \n\t"\ | |
371 "fpack16 %%f2, %%f2 \n\t"\ | |
372 "fpack16 %%f4, %%f4 \n\t"\ | |
373 "fpack16 %%f6, %%f6 \n\t"\ | |
374 "fpack16 %%f8, %%f8 \n\t"\ | |
375 "fpack16 %%f10, %%f10 \n\t"\ | |
376 "fpack16 %%f12, %%f12 \n\t"\ | |
377 "fpack16 %%f14, %%f14 \n\t"\ | |
378 "st %%f0, [%3+" dest "] \n\t"\ | |
379 "st %%f2, [%6+" dest "] \n\t"\ | |
380 "st %%f4, [%7+" dest "] \n\t"\ | |
381 "st %%f6, [%8+" dest "] \n\t"\ | |
382 "st %%f8, [%9+" dest "] \n\t"\ | |
383 "st %%f10, [%10+" dest "] \n\t"\ | |
384 "st %%f12, [%11+" dest "] \n\t"\ | |
385 "st %%f14, [%12+" dest "] \n\t"\ | |
386 | |
387 | |
8285
197fe6f703a3
Remove useless inline qualifier, fixes linking with gcc 4.3.
diego
parents:
8031
diff
changeset
|
388 void ff_simple_idct_vis(DCTELEM *data) { |
5618 | 389 int out1, out2, out3, out4; |
11369 | 390 DECLARE_ALIGNED(8, int16_t, temp)[8*8]; |
5618 | 391 |
8031 | 392 __asm__ volatile( |
5618 | 393 INIT_IDCT |
394 | |
395 #define ADDROUNDER | |
396 | |
397 // shift right 16-4=12 | |
398 LOADSCALE("%2+8") | |
399 IDCT4ROWS | |
400 STOREROWS("%3+8") | |
401 LOADSCALE("%2+0") | |
402 IDCT4ROWS | |
403 "std %%f48, [%3+112] \n\t" | |
404 "std %%f50, [%3+96] \n\t" | |
405 "std %%f52, [%3+80] \n\t" | |
406 "std %%f54, [%3+64] \n\t" | |
407 | |
408 // shift right 16+4 | |
409 "ldd [%3+8], %%f18 \n\t" | |
410 "ldd [%3+24], %%f22 \n\t" | |
411 "ldd [%3+40], %%f26 \n\t" | |
412 "ldd [%3+56], %%f30 \n\t" | |
413 TRANSPOSE | |
414 IDCT4ROWS | |
415 SCALEROWS | |
416 STOREROWS("%2+0") | |
417 LOAD("%3+64") | |
418 TRANSPOSE | |
419 IDCT4ROWS | |
420 SCALEROWS | |
421 STOREROWS("%2+8") | |
422 | |
423 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4) | |
424 : "0" (scale), "1" (coeffs), "2" (data), "3" (temp) | |
425 ); | |
426 } | |
427 | |
428 void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data) { | |
429 int out1, out2, out3, out4, out5; | |
430 int r1, r2, r3, r4, r5, r6, r7; | |
431 | |
8031 | 432 __asm__ volatile( |
5618 | 433 "wr %%g0, 0x8, %%gsr \n\t" |
434 | |
435 INIT_IDCT | |
436 | |
437 "add %3, %4, %5 \n\t" | |
438 "add %5, %4, %6 \n\t" | |
439 "add %6, %4, %7 \n\t" | |
440 "add %7, %4, %8 \n\t" | |
441 "add %8, %4, %9 \n\t" | |
442 "add %9, %4, %10 \n\t" | |
443 "add %10, %4, %11 \n\t" | |
444 | |
445 // shift right 16-4=12 | |
446 LOADSCALE("%2+8") | |
447 IDCT4ROWS | |
448 STOREROWS("%2+8") | |
449 LOADSCALE("%2+0") | |
450 IDCT4ROWS | |
451 "std %%f48, [%2+112] \n\t" | |
452 "std %%f50, [%2+96] \n\t" | |
453 "std %%f52, [%2+80] \n\t" | |
454 "std %%f54, [%2+64] \n\t" | |
455 | |
456 #undef ADDROUNDER | |
457 #define ADDROUNDER "fpadd16 %%f28, %%f46, %%f28 \n\t" | |
458 | |
459 // shift right 16+4 | |
460 "ldd [%2+8], %%f18 \n\t" | |
461 "ldd [%2+24], %%f22 \n\t" | |
462 "ldd [%2+40], %%f26 \n\t" | |
463 "ldd [%2+56], %%f30 \n\t" | |
464 TRANSPOSE | |
465 IDCT4ROWS | |
466 PUTPIXELSCLAMPED("0") | |
467 LOAD("%2+64") | |
468 TRANSPOSE | |
469 IDCT4ROWS | |
470 PUTPIXELSCLAMPED("4") | |
471 | |
472 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), | |
473 "=r" (r1), "=r" (r2), "=r" (r3), "=r" (r4), "=r" (r5), "=r" (r6), "=r" (r7) | |
474 : "0" (rounder), "1" (coeffs), "2" (data), "3" (dest), "4" (line_size) | |
475 ); | |
476 } | |
477 | |
478 void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data) { | |
479 int out1, out2, out3, out4, out5, out6; | |
480 int r1, r2, r3, r4, r5, r6, r7; | |
481 | |
8031 | 482 __asm__ volatile( |
5618 | 483 "wr %%g0, 0x8, %%gsr \n\t" |
484 | |
485 INIT_IDCT | |
486 | |
487 "add %3, %4, %6 \n\t" | |
488 "add %6, %4, %7 \n\t" | |
489 "add %7, %4, %8 \n\t" | |
490 "add %8, %4, %9 \n\t" | |
491 "add %9, %4, %10 \n\t" | |
492 "add %10, %4, %11 \n\t" | |
493 "add %11, %4, %12 \n\t" | |
494 | |
495 #undef ADDROUNDER | |
496 #define ADDROUNDER | |
497 | |
498 // shift right 16-4=12 | |
499 LOADSCALE("%2+8") | |
500 IDCT4ROWS | |
501 STOREROWS("%2+8") | |
502 LOADSCALE("%2+0") | |
503 IDCT4ROWS | |
504 "std %%f48, [%2+112] \n\t" | |
505 "std %%f50, [%2+96] \n\t" | |
506 "std %%f52, [%2+80] \n\t" | |
507 "std %%f54, [%2+64] \n\t" | |
508 | |
509 #undef ADDROUNDER | |
510 #define ADDROUNDER "fpadd16 %%f28, %%f46, %%f28 \n\t" | |
511 | |
512 // shift right 16+4 | |
513 "ldd [%2+8], %%f18 \n\t" | |
514 "ldd [%2+24], %%f22 \n\t" | |
515 "ldd [%2+40], %%f26 \n\t" | |
516 "ldd [%2+56], %%f30 \n\t" | |
517 TRANSPOSE | |
518 IDCT4ROWS | |
519 ADDPIXELSCLAMPED("0") | |
520 LOAD("%2+64") | |
521 TRANSPOSE | |
522 IDCT4ROWS | |
523 ADDPIXELSCLAMPED("4") | |
524 | |
525 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6), | |
526 "=r" (r1), "=r" (r2), "=r" (r3), "=r" (r4), "=r" (r5), "=r" (r6), "=r" (r7) | |
527 : "0" (rounder), "1" (coeffs), "2" (data), "3" (dest), "4" (line_size), "5" (expand) | |
528 ); | |
529 } |