Mercurial > mplayer.hg
annotate libmpeg2/idct_alpha.c @ 29711:d2e01d720218
Remove many uses of tmp_run.
Checking that installed header and library match is not really our task,
also if desired it would be more correct to do it at runtime (e.g. because
of distributed binaries, or system updates gone wrong, ...).
tmp_run also slows down configure on systems with slow fork like MinGW.
author | reimar |
---|---|
date | Tue, 06 Oct 2009 06:43:00 +0000 |
parents | e83eef58b30a |
children |
rev | line source |
---|---|
9857 | 1 /* |
2 * idct_alpha.c | |
12932 | 3 * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org> |
4 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> | |
9857 | 5 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
6 * | |
7 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
8 * See http://libmpeg2.sourceforge.net/ for updates. | |
9 * | |
10 * mpeg2dec is free software; you can redistribute it and/or modify | |
11 * it under the terms of the GNU General Public License as published by | |
12 * the Free Software Foundation; either version 2 of the License, or | |
13 * (at your option) any later version. | |
14 * | |
15 * mpeg2dec is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 * GNU General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU General Public License | |
21 * along with this program; if not, write to the Free Software | |
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
23 */ | |
24 | |
25 #include "config.h" | |
26 | |
28290 | 27 #if ARCH_ALPHA |
9857 | 28 |
29 #include <stdlib.h> | |
30 #include <inttypes.h> | |
31 | |
12932 | 32 #include "mpeg2.h" |
33 #include "attributes.h" | |
34 #include "mpeg2_internal.h" | |
9857 | 35 #include "alpha_asm.h" |
36 | |
12932 | 37 #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ |
38 #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ | |
39 #define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ | |
40 #define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ | |
41 #define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ | |
42 #define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ | |
9857 | 43 |
12932 | 44 extern uint8_t mpeg2_clip[3840 * 2 + 256]; |
45 #define CLIP(i) ((mpeg2_clip + 3840)[i]) | |
9857 | 46 |
47 #if 0 | |
48 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | |
49 do { \ | |
12932 | 50 t0 = W0 * d0 + W1 * d1; \ |
51 t1 = W0 * d1 - W1 * d0; \ | |
9857 | 52 } while (0) |
53 #else | |
54 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | |
55 do { \ | |
56 int_fast32_t tmp = W0 * (d0 + d1); \ | |
57 t0 = tmp + (W1 - W0) * d1; \ | |
58 t1 = tmp - (W1 + W0) * d0; \ | |
59 } while (0) | |
60 #endif | |
61 | |
10392 | 62 static inline void idct_row (int16_t * const block) |
9857 | 63 { |
64 uint64_t l, r; | |
65 int_fast32_t d0, d1, d2, d3; | |
66 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; | |
67 int_fast32_t t0, t1, t2, t3; | |
68 | |
69 l = ldq (block); | |
70 r = ldq (block + 4); | |
71 | |
72 /* shortcut */ | |
73 if (likely (!((l & ~0xffffUL) | r))) { | |
12932 | 74 uint64_t tmp = (uint16_t) (l >> 1); |
9857 | 75 tmp |= tmp << 16; |
76 tmp |= tmp << 32; | |
77 ((int32_t *)block)[0] = tmp; | |
78 ((int32_t *)block)[1] = tmp; | |
79 ((int32_t *)block)[2] = tmp; | |
80 ((int32_t *)block)[3] = tmp; | |
81 return; | |
82 } | |
83 | |
12932 | 84 d0 = (sextw (l) << 11) + 2048; |
9857 | 85 d1 = sextw (extwl (l, 2)); |
86 d2 = sextw (extwl (l, 4)) << 11; | |
87 d3 = sextw (extwl (l, 6)); | |
88 t0 = d0 + d2; | |
89 t1 = d0 - d2; | |
90 BUTTERFLY (t2, t3, W6, W2, d3, d1); | |
91 a0 = t0 + t2; | |
92 a1 = t1 + t3; | |
93 a2 = t1 - t3; | |
94 a3 = t0 - t2; | |
95 | |
96 d0 = sextw (r); | |
97 d1 = sextw (extwl (r, 2)); | |
98 d2 = sextw (extwl (r, 4)); | |
99 d3 = sextw (extwl (r, 6)); | |
100 BUTTERFLY (t0, t1, W7, W1, d3, d0); | |
101 BUTTERFLY (t2, t3, W3, W5, d1, d2); | |
102 b0 = t0 + t2; | |
103 b3 = t1 + t3; | |
104 t0 -= t2; | |
105 t1 -= t3; | |
12932 | 106 b1 = ((t0 + t1) >> 8) * 181; |
107 b2 = ((t0 - t1) >> 8) * 181; | |
9857 | 108 |
12932 | 109 block[0] = (a0 + b0) >> 12; |
110 block[1] = (a1 + b1) >> 12; | |
111 block[2] = (a2 + b2) >> 12; | |
112 block[3] = (a3 + b3) >> 12; | |
113 block[4] = (a3 - b3) >> 12; | |
114 block[5] = (a2 - b2) >> 12; | |
115 block[6] = (a1 - b1) >> 12; | |
116 block[7] = (a0 - b0) >> 12; | |
9857 | 117 } |
118 | |
10392 | 119 static inline void idct_col (int16_t * const block) |
9857 | 120 { |
121 int_fast32_t d0, d1, d2, d3; | |
122 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; | |
123 int_fast32_t t0, t1, t2, t3; | |
124 | |
125 d0 = (block[8*0] << 11) + 65536; | |
126 d1 = block[8*1]; | |
127 d2 = block[8*2] << 11; | |
128 d3 = block[8*3]; | |
129 t0 = d0 + d2; | |
130 t1 = d0 - d2; | |
131 BUTTERFLY (t2, t3, W6, W2, d3, d1); | |
132 a0 = t0 + t2; | |
133 a1 = t1 + t3; | |
134 a2 = t1 - t3; | |
135 a3 = t0 - t2; | |
136 | |
137 d0 = block[8*4]; | |
138 d1 = block[8*5]; | |
139 d2 = block[8*6]; | |
140 d3 = block[8*7]; | |
141 BUTTERFLY (t0, t1, W7, W1, d3, d0); | |
142 BUTTERFLY (t2, t3, W3, W5, d1, d2); | |
143 b0 = t0 + t2; | |
144 b3 = t1 + t3; | |
12932 | 145 t0 -= t2; |
146 t1 -= t3; | |
147 b1 = ((t0 + t1) >> 8) * 181; | |
148 b2 = ((t0 - t1) >> 8) * 181; | |
9857 | 149 |
150 block[8*0] = (a0 + b0) >> 17; | |
151 block[8*1] = (a1 + b1) >> 17; | |
152 block[8*2] = (a2 + b2) >> 17; | |
153 block[8*3] = (a3 + b3) >> 17; | |
154 block[8*4] = (a3 - b3) >> 17; | |
155 block[8*5] = (a2 - b2) >> 17; | |
156 block[8*6] = (a1 - b1) >> 17; | |
157 block[8*7] = (a0 - b0) >> 17; | |
158 } | |
159 | |
160 void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride) | |
161 { | |
162 uint64_t clampmask; | |
163 int i; | |
164 | |
165 for (i = 0; i < 8; i++) | |
166 idct_row (block + 8 * i); | |
167 | |
168 for (i = 0; i < 8; i++) | |
169 idct_col (block + i); | |
170 | |
171 clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ | |
172 do { | |
173 uint64_t shorts0, shorts1; | |
174 | |
175 shorts0 = ldq (block); | |
176 shorts0 = maxsw4 (shorts0, 0); | |
177 shorts0 = minsw4 (shorts0, clampmask); | |
178 stl (pkwb (shorts0), dest); | |
179 | |
180 shorts1 = ldq (block + 4); | |
181 shorts1 = maxsw4 (shorts1, 0); | |
182 shorts1 = minsw4 (shorts1, clampmask); | |
183 stl (pkwb (shorts1), dest + 4); | |
184 | |
185 stq (0, block); | |
186 stq (0, block + 4); | |
187 | |
188 dest += stride; | |
189 block += 8; | |
190 } while (--i); | |
191 } | |
192 | |
193 void mpeg2_idct_add_mvi (const int last, int16_t * block, | |
194 uint8_t * dest, const int stride) | |
195 { | |
196 uint64_t clampmask; | |
197 uint64_t signmask; | |
198 int i; | |
199 | |
12932 | 200 if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { |
9857 | 201 for (i = 0; i < 8; i++) |
202 idct_row (block + 8 * i); | |
203 for (i = 0; i < 8; i++) | |
204 idct_col (block + i); | |
205 clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ | |
206 signmask = zap (-1, 0x33); | |
207 signmask ^= signmask >> 1; /* 0x8000800080008000 */ | |
208 | |
209 do { | |
210 uint64_t shorts0, pix0, signs0; | |
211 uint64_t shorts1, pix1, signs1; | |
212 | |
213 shorts0 = ldq (block); | |
214 shorts1 = ldq (block + 4); | |
215 | |
216 pix0 = unpkbw (ldl (dest)); | |
217 /* signed subword add (MMX paddw). */ | |
218 signs0 = shorts0 & signmask; | |
219 shorts0 &= ~signmask; | |
220 shorts0 += pix0; | |
221 shorts0 ^= signs0; | |
222 /* clamp. */ | |
223 shorts0 = maxsw4 (shorts0, 0); | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
28290
diff
changeset
|
224 shorts0 = minsw4 (shorts0, clampmask); |
9857 | 225 |
226 /* next 4. */ | |
227 pix1 = unpkbw (ldl (dest + 4)); | |
228 signs1 = shorts1 & signmask; | |
229 shorts1 &= ~signmask; | |
230 shorts1 += pix1; | |
231 shorts1 ^= signs1; | |
232 shorts1 = maxsw4 (shorts1, 0); | |
233 shorts1 = minsw4 (shorts1, clampmask); | |
234 | |
235 stl (pkwb (shorts0), dest); | |
236 stl (pkwb (shorts1), dest + 4); | |
237 stq (0, block); | |
238 stq (0, block + 4); | |
239 | |
240 dest += stride; | |
241 block += 8; | |
242 } while (--i); | |
243 } else { | |
244 int DC; | |
245 uint64_t p0, p1, p2, p3, p4, p5, p6, p7; | |
246 uint64_t DCs; | |
247 | |
12932 | 248 DC = (block[0] + 64) >> 7; |
9857 | 249 block[0] = block[63] = 0; |
250 | |
251 p0 = ldq (dest + 0 * stride); | |
252 p1 = ldq (dest + 1 * stride); | |
253 p2 = ldq (dest + 2 * stride); | |
254 p3 = ldq (dest + 3 * stride); | |
255 p4 = ldq (dest + 4 * stride); | |
256 p5 = ldq (dest + 5 * stride); | |
257 p6 = ldq (dest + 6 * stride); | |
258 p7 = ldq (dest + 7 * stride); | |
259 | |
260 if (DC > 0) { | |
261 DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255); | |
262 p0 += minub8 (DCs, ~p0); | |
263 p1 += minub8 (DCs, ~p1); | |
264 p2 += minub8 (DCs, ~p2); | |
265 p3 += minub8 (DCs, ~p3); | |
266 p4 += minub8 (DCs, ~p4); | |
267 p5 += minub8 (DCs, ~p5); | |
268 p6 += minub8 (DCs, ~p6); | |
269 p7 += minub8 (DCs, ~p7); | |
270 } else { | |
271 DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255); | |
272 p0 -= minub8 (DCs, p0); | |
273 p1 -= minub8 (DCs, p1); | |
274 p2 -= minub8 (DCs, p2); | |
275 p3 -= minub8 (DCs, p3); | |
276 p4 -= minub8 (DCs, p4); | |
277 p5 -= minub8 (DCs, p5); | |
278 p6 -= minub8 (DCs, p6); | |
279 p7 -= minub8 (DCs, p7); | |
280 } | |
281 | |
282 stq (p0, dest + 0 * stride); | |
283 stq (p1, dest + 1 * stride); | |
284 stq (p2, dest + 2 * stride); | |
285 stq (p3, dest + 3 * stride); | |
286 stq (p4, dest + 4 * stride); | |
287 stq (p5, dest + 5 * stride); | |
288 stq (p6, dest + 6 * stride); | |
289 stq (p7, dest + 7 * stride); | |
290 } | |
291 } | |
292 | |
293 void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride) | |
294 { | |
295 int i; | |
296 | |
297 for (i = 0; i < 8; i++) | |
298 idct_row (block + 8 * i); | |
299 for (i = 0; i < 8; i++) | |
300 idct_col (block + i); | |
301 do { | |
302 dest[0] = CLIP (block[0]); | |
303 dest[1] = CLIP (block[1]); | |
304 dest[2] = CLIP (block[2]); | |
305 dest[3] = CLIP (block[3]); | |
306 dest[4] = CLIP (block[4]); | |
307 dest[5] = CLIP (block[5]); | |
308 dest[6] = CLIP (block[6]); | |
309 dest[7] = CLIP (block[7]); | |
310 | |
311 stq(0, block); | |
312 stq(0, block + 4); | |
313 | |
314 dest += stride; | |
315 block += 8; | |
316 } while (--i); | |
317 } | |
318 | |
319 void mpeg2_idct_add_alpha (const int last, int16_t * block, | |
320 uint8_t * dest, const int stride) | |
321 { | |
322 int i; | |
323 | |
12932 | 324 if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { |
9857 | 325 for (i = 0; i < 8; i++) |
326 idct_row (block + 8 * i); | |
327 for (i = 0; i < 8; i++) | |
328 idct_col (block + i); | |
329 do { | |
330 dest[0] = CLIP (block[0] + dest[0]); | |
331 dest[1] = CLIP (block[1] + dest[1]); | |
332 dest[2] = CLIP (block[2] + dest[2]); | |
333 dest[3] = CLIP (block[3] + dest[3]); | |
334 dest[4] = CLIP (block[4] + dest[4]); | |
335 dest[5] = CLIP (block[5] + dest[5]); | |
336 dest[6] = CLIP (block[6] + dest[6]); | |
337 dest[7] = CLIP (block[7] + dest[7]); | |
338 | |
339 stq(0, block); | |
340 stq(0, block + 4); | |
341 | |
342 dest += stride; | |
343 block += 8; | |
344 } while (--i); | |
345 } else { | |
346 int DC; | |
347 | |
12932 | 348 DC = (block[0] + 64) >> 7; |
9857 | 349 block[0] = block[63] = 0; |
350 i = 8; | |
351 do { | |
352 dest[0] = CLIP (DC + dest[0]); | |
353 dest[1] = CLIP (DC + dest[1]); | |
354 dest[2] = CLIP (DC + dest[2]); | |
355 dest[3] = CLIP (DC + dest[3]); | |
356 dest[4] = CLIP (DC + dest[4]); | |
357 dest[5] = CLIP (DC + dest[5]); | |
358 dest[6] = CLIP (DC + dest[6]); | |
359 dest[7] = CLIP (DC + dest[7]); | |
360 dest += stride; | |
361 } while (--i); | |
362 } | |
363 } | |
364 | |
12932 | 365 void mpeg2_idct_alpha_init (void) |
9857 | 366 { |
367 int i, j; | |
368 | |
369 for (i = 0; i < 64; i++) { | |
370 j = mpeg2_scan_norm[i]; | |
371 mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); | |
372 j = mpeg2_scan_alt[i]; | |
373 mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); | |
374 } | |
375 } | |
376 | |
377 #endif /* ARCH_ALPHA */ |