Mercurial > mplayer.hg
annotate libmpeg2/idct.c @ 10730:67449e5936f3
fix 10l (computation based on uninitialized data which led to
incorrect field matching) and greatly improve selection logic. the
pullup core should be very accurate now, so try throwing tough samples
at it and report any failures! :)
author | rfelker |
---|---|
date | Sun, 31 Aug 2003 17:46:32 +0000 |
parents | ec04f41e2480 |
children | d0a8810e155c |
rev | line source |
---|---|
1 | 1 /* |
2 * idct.c | |
9852 | 3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> |
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | |
1 | 5 * |
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
9852 | 7 * See http://libmpeg2.sourceforge.net/ for updates. |
1 | 8 * |
9 * mpeg2dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * mpeg2dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 */ | |
23 | |
24 #include "config.h" | |
25 | |
9852 | 26 #include <stdlib.h> |
1 | 27 #include <inttypes.h> |
28 | |
9852 | 29 #include "mpeg2.h" |
1 | 30 #include "mpeg2_internal.h" |
9852 | 31 #include "attributes.h" |
1 | 32 |
33 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | |
34 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | |
35 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ | |
36 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ | |
37 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ | |
38 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ | |
39 | |
36 | 40 /* idct main entry point */ |
9852 | 41 void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); |
42 void (* mpeg2_idct_add) (int last, int16_t * block, | |
43 uint8_t * dest, int stride); | |
1 | 44 |
45 static uint8_t clip_lut[1024]; | |
9852 | 46 #define CLIP(i) ((clip_lut+384)[(i)]) |
1 | 47 |
9852 | 48 #if 0 |
49 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | |
50 do { \ | |
51 t0 = W0*d0 + W1*d1; \ | |
52 t1 = W0*d1 - W1*d0; \ | |
53 } while (0) | |
54 #else | |
55 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | |
56 do { \ | |
57 int tmp = W0 * (d0 + d1); \ | |
58 t0 = tmp + (W1 - W0) * d1; \ | |
59 t1 = tmp - (W1 + W0) * d0; \ | |
60 } while (0) | |
61 #endif | |
1 | 62 |
10392 | 63 static inline void idct_row (int16_t * const block) |
1 | 64 { |
9852 | 65 int d0, d1, d2, d3; |
66 int a0, a1, a2, a3, b0, b1, b2, b3; | |
67 int t0, t1, t2, t3; | |
1 | 68 |
69 /* shortcut */ | |
9852 | 70 if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] | |
71 ((int32_t *)block)[3]))) { | |
72 uint32_t tmp = (uint16_t) (block[0] << 3); | |
73 tmp |= tmp << 16; | |
74 ((int32_t *)block)[0] = tmp; | |
75 ((int32_t *)block)[1] = tmp; | |
76 ((int32_t *)block)[2] = tmp; | |
77 ((int32_t *)block)[3] = tmp; | |
1 | 78 return; |
79 } | |
80 | |
9852 | 81 d0 = (block[0] << 11) + 128; |
82 d1 = block[1]; | |
83 d2 = block[2] << 11; | |
84 d3 = block[3]; | |
85 t0 = d0 + d2; | |
86 t1 = d0 - d2; | |
87 BUTTERFLY (t2, t3, W6, W2, d3, d1); | |
88 a0 = t0 + t2; | |
89 a1 = t1 + t3; | |
90 a2 = t1 - t3; | |
91 a3 = t0 - t2; | |
1 | 92 |
9852 | 93 d0 = block[4]; |
94 d1 = block[5]; | |
95 d2 = block[6]; | |
96 d3 = block[7]; | |
97 BUTTERFLY (t0, t1, W7, W1, d3, d0); | |
98 BUTTERFLY (t2, t3, W3, W5, d1, d2); | |
99 b0 = t0 + t2; | |
100 b3 = t1 + t3; | |
101 t0 -= t2; | |
102 t1 -= t3; | |
103 b1 = ((t0 + t1) * 181) >> 8; | |
104 b2 = ((t0 - t1) * 181) >> 8; | |
105 | |
106 block[0] = (a0 + b0) >> 8; | |
107 block[1] = (a1 + b1) >> 8; | |
108 block[2] = (a2 + b2) >> 8; | |
109 block[3] = (a3 + b3) >> 8; | |
110 block[4] = (a3 - b3) >> 8; | |
111 block[5] = (a2 - b2) >> 8; | |
112 block[6] = (a1 - b1) >> 8; | |
113 block[7] = (a0 - b0) >> 8; | |
1 | 114 } |
115 | |
10392 | 116 static inline void idct_col (int16_t * const block) |
1 | 117 { |
9852 | 118 int d0, d1, d2, d3; |
119 int a0, a1, a2, a3, b0, b1, b2, b3; | |
120 int t0, t1, t2, t3; | |
1 | 121 |
9852 | 122 d0 = (block[8*0] << 11) + 65536; |
123 d1 = block[8*1]; | |
124 d2 = block[8*2] << 11; | |
125 d3 = block[8*3]; | |
126 t0 = d0 + d2; | |
127 t1 = d0 - d2; | |
128 BUTTERFLY (t2, t3, W6, W2, d3, d1); | |
129 a0 = t0 + t2; | |
130 a1 = t1 + t3; | |
131 a2 = t1 - t3; | |
132 a3 = t0 - t2; | |
1 | 133 |
9852 | 134 d0 = block[8*4]; |
135 d1 = block[8*5]; | |
136 d2 = block[8*6]; | |
137 d3 = block[8*7]; | |
138 BUTTERFLY (t0, t1, W7, W1, d3, d0); | |
139 BUTTERFLY (t2, t3, W3, W5, d1, d2); | |
140 b0 = t0 + t2; | |
141 b3 = t1 + t3; | |
142 t0 = (t0 - t2) >> 8; | |
143 t1 = (t1 - t3) >> 8; | |
144 b1 = (t0 + t1) * 181; | |
145 b2 = (t0 - t1) * 181; | |
146 | |
147 block[8*0] = (a0 + b0) >> 17; | |
148 block[8*1] = (a1 + b1) >> 17; | |
149 block[8*2] = (a2 + b2) >> 17; | |
150 block[8*3] = (a3 + b3) >> 17; | |
151 block[8*4] = (a3 - b3) >> 17; | |
152 block[8*5] = (a2 - b2) >> 17; | |
153 block[8*6] = (a1 - b1) >> 17; | |
154 block[8*7] = (a0 - b0) >> 17; | |
1 | 155 } |
156 | |
9852 | 157 static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, |
158 const int stride) | |
1 | 159 { |
160 int i; | |
161 | |
162 for (i = 0; i < 8; i++) | |
163 idct_row (block + 8 * i); | |
164 for (i = 0; i < 8; i++) | |
165 idct_col (block + i); | |
166 do { | |
167 dest[0] = CLIP (block[0]); | |
168 dest[1] = CLIP (block[1]); | |
169 dest[2] = CLIP (block[2]); | |
170 dest[3] = CLIP (block[3]); | |
171 dest[4] = CLIP (block[4]); | |
172 dest[5] = CLIP (block[5]); | |
173 dest[6] = CLIP (block[6]); | |
174 dest[7] = CLIP (block[7]); | |
175 | |
9852 | 176 block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; |
177 block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; | |
178 | |
1 | 179 dest += stride; |
180 block += 8; | |
181 } while (--i); | |
182 } | |
183 | |
9852 | 184 static void mpeg2_idct_add_c (const int last, int16_t * block, |
185 uint8_t * dest, const int stride) | |
1 | 186 { |
187 int i; | |
188 | |
9852 | 189 if (last != 129 || (block[0] & 7) == 4) { |
190 for (i = 0; i < 8; i++) | |
191 idct_row (block + 8 * i); | |
192 for (i = 0; i < 8; i++) | |
193 idct_col (block + i); | |
194 do { | |
195 dest[0] = CLIP (block[0] + dest[0]); | |
196 dest[1] = CLIP (block[1] + dest[1]); | |
197 dest[2] = CLIP (block[2] + dest[2]); | |
198 dest[3] = CLIP (block[3] + dest[3]); | |
199 dest[4] = CLIP (block[4] + dest[4]); | |
200 dest[5] = CLIP (block[5] + dest[5]); | |
201 dest[6] = CLIP (block[6] + dest[6]); | |
202 dest[7] = CLIP (block[7] + dest[7]); | |
203 | |
204 block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; | |
205 block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; | |
1 | 206 |
9852 | 207 dest += stride; |
208 block += 8; | |
209 } while (--i); | |
210 } else { | |
211 int DC; | |
212 | |
213 DC = (block[0] + 4) >> 3; | |
214 block[0] = block[63] = 0; | |
215 i = 8; | |
216 do { | |
217 dest[0] = CLIP (DC + dest[0]); | |
218 dest[1] = CLIP (DC + dest[1]); | |
219 dest[2] = CLIP (DC + dest[2]); | |
220 dest[3] = CLIP (DC + dest[3]); | |
221 dest[4] = CLIP (DC + dest[4]); | |
222 dest[5] = CLIP (DC + dest[5]); | |
223 dest[6] = CLIP (DC + dest[6]); | |
224 dest[7] = CLIP (DC + dest[7]); | |
225 dest += stride; | |
226 } while (--i); | |
227 } | |
228 } | |
1 | 229 |
9852 | 230 void mpeg2_idct_init (uint32_t accel) |
231 { | |
232 #ifdef ARCH_X86 | |
233 if (accel & MPEG2_ACCEL_X86_MMXEXT) { | |
234 mpeg2_idct_copy = mpeg2_idct_copy_mmxext; | |
235 mpeg2_idct_add = mpeg2_idct_add_mmxext; | |
236 mpeg2_idct_mmx_init (); | |
237 } else if (accel & MPEG2_ACCEL_X86_MMX) { | |
238 mpeg2_idct_copy = mpeg2_idct_copy_mmx; | |
239 mpeg2_idct_add = mpeg2_idct_add_mmx; | |
240 mpeg2_idct_mmx_init (); | |
241 } else | |
242 #endif | |
243 #ifdef ARCH_PPC | |
10269
217eb10b2f2d
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9852
diff
changeset
|
244 #ifdef HAVE_ALTIVEC |
9852 | 245 if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { |
246 mpeg2_idct_copy = mpeg2_idct_copy_altivec; | |
247 mpeg2_idct_add = mpeg2_idct_add_altivec; | |
248 mpeg2_idct_altivec_init (); | |
249 } else | |
250 #endif | |
10269
217eb10b2f2d
libmpeg2-altivec patch by Magnus Damm <damm@opensource.se>:
arpi
parents:
9852
diff
changeset
|
251 #endif |
9852 | 252 #ifdef ARCH_ALPHA |
10488 | 253 #ifdef CAN_COMPILE_ALPHA_MVI |
9852 | 254 if (accel & MPEG2_ACCEL_ALPHA_MVI) { |
255 mpeg2_idct_copy = mpeg2_idct_copy_mvi; | |
256 mpeg2_idct_add = mpeg2_idct_add_mvi; | |
257 mpeg2_idct_alpha_init (0); | |
10488 | 258 } else |
259 #endif | |
260 if (accel & MPEG2_ACCEL_ALPHA) { | |
9852 | 261 mpeg2_idct_copy = mpeg2_idct_copy_alpha; |
262 mpeg2_idct_add = mpeg2_idct_add_alpha; | |
263 mpeg2_idct_alpha_init (1); | |
264 } else | |
265 #endif | |
266 #ifdef LIBMPEG2_MLIB | |
267 if (accel & MPEG2_ACCEL_MLIB) { | |
268 mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee; | |
269 mpeg2_idct_add = (getenv ("MLIB_NON_IEEE") ? | |
270 mpeg2_idct_add_mlib_non_ieee : mpeg2_idct_add_mlib); | |
271 } else | |
272 #endif | |
273 { | |
274 extern uint8_t mpeg2_scan_norm[64]; | |
275 extern uint8_t mpeg2_scan_alt[64]; | |
276 int i, j; | |
1 | 277 |
9852 | 278 mpeg2_idct_copy = mpeg2_idct_copy_c; |
279 mpeg2_idct_add = mpeg2_idct_add_c; | |
280 for (i = -384; i < 640; i++) | |
281 clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); | |
282 for (i = 0; i < 64; i++) { | |
283 j = mpeg2_scan_norm[i]; | |
284 mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); | |
285 j = mpeg2_scan_alt[i]; | |
286 mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); | |
287 } | |
288 } | |
1 | 289 } |