Mercurial > libavcodec.hg
annotate jrevdct.c @ 2497:69adfbbdcdeb libavcodec
- samples from mplayer ftp in the "adv" profile seem to have profile=2,
which isn't the advanced one; and indeed, using adv. profile parser fails.
Using normal parser works, and that's what is done
- attempt at taking care of stride for NORM2 bitplane decoding
- duplication of much code from msmpeg4.c; this code isn't yet used, but
goes down as far as the block layer (mainly Transform Type stuff, the
remains are wild editing without checking). Unusable yet, and lacks the AC
decoding (but a step further in bitstream parsing)
patch by anonymous
author | michael |
---|---|
date | Fri, 04 Feb 2005 02:20:38 +0000 |
parents | 258f21820108 |
children | ef2149182f1c |
rev | line source |
---|---|
0 | 1 /* |
2 * jrevdct.c | |
3 * | |
4 * Copyright (C) 1991, 1992, Thomas G. Lane. | |
5 * This file is part of the Independent JPEG Group's software. | |
6 * For conditions of distribution and use, see the accompanying README file. | |
7 * | |
8 * This file contains the basic inverse-DCT transformation subroutine. | |
9 * | |
10 * This implementation is based on an algorithm described in | |
11 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | |
12 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | |
13 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | |
14 * The primary algorithm described there uses 11 multiplies and 29 adds. | |
15 * We use their alternate method with 12 multiplies and 32 adds. | |
16 * The advantage of this method is that no data path contains more than one | |
17 * multiplication; this allows a very simple and accurate implementation in | |
18 * scaled fixed-point arithmetic, with a minimal number of shifts. | |
19 * | |
20 * I've made lots of modifications to attempt to take advantage of the | |
21 * sparse nature of the DCT matrices we're getting. Although the logic | |
22 * is cumbersome, it's straightforward and the resulting code is much | |
23 * faster. | |
24 * | |
25 * A better way to do this would be to pass in the DCT block as a sparse | |
26 * matrix, perhaps with the difference cases encoded. | |
27 */ | |
1106 | 28 |
29 /** | |
30 * @file jrevdct.c | |
31 * Independent JPEG Group's LLM idct. | |
32 */ | |
33 | |
0 | 34 #include "common.h" |
35 #include "dsputil.h" | |
36 | |
37 #define EIGHT_BIT_SAMPLES | |
38 | |
39 #define DCTSIZE 8 | |
40 #define DCTSIZE2 64 | |
41 | |
42 #define GLOBAL | |
43 | |
44 #define RIGHT_SHIFT(x, n) ((x) >> (n)) | |
45 | |
46 typedef DCTELEM DCTBLOCK[DCTSIZE2]; | |
47 | |
48 #define CONST_BITS 13 | |
49 | |
50 /* | |
51 * This routine is specialized to the case DCTSIZE = 8. | |
52 */ | |
53 | |
54 #if DCTSIZE != 8 | |
55 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ | |
56 #endif | |
57 | |
58 | |
59 /* | |
60 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT | |
61 * on each column. Direct algorithms are also available, but they are | |
62 * much more complex and seem not to be any faster when reduced to code. | |
63 * | |
64 * The poop on this scaling stuff is as follows: | |
65 * | |
66 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) | |
67 * larger than the true IDCT outputs. The final outputs are therefore | |
68 * a factor of N larger than desired; since N=8 this can be cured by | |
69 * a simple right shift at the end of the algorithm. The advantage of | |
70 * this arrangement is that we save two multiplications per 1-D IDCT, | |
71 * because the y0 and y4 inputs need not be divided by sqrt(N). | |
72 * | |
73 * We have to do addition and subtraction of the integer inputs, which | |
74 * is no problem, and multiplication by fractional constants, which is | |
75 * a problem to do in integer arithmetic. We multiply all the constants | |
76 * by CONST_SCALE and convert them to integer constants (thus retaining | |
77 * CONST_BITS bits of precision in the constants). After doing a | |
78 * multiplication we have to divide the product by CONST_SCALE, with proper | |
79 * rounding, to produce the correct output. This division can be done | |
80 * cheaply as a right shift of CONST_BITS bits. We postpone shifting | |
81 * as long as possible so that partial sums can be added together with | |
82 * full fractional precision. | |
83 * | |
84 * The outputs of the first pass are scaled up by PASS1_BITS bits so that | |
85 * they are represented to better-than-integral precision. These outputs | |
86 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | |
87 * with the recommended scaling. (To scale up 12-bit sample data further, an | |
88 * intermediate int32 array would be needed.) | |
89 * | |
90 * To avoid overflow of the 32-bit intermediate results in pass 2, we must | |
91 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis | |
92 * shows that the values given below are the most effective. | |
93 */ | |
94 | |
95 #ifdef EIGHT_BIT_SAMPLES | |
96 #define PASS1_BITS 2 | |
97 #else | |
98 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | |
99 #endif | |
100 | |
1064 | 101 #define ONE ((int32_t) 1) |
0 | 102 |
103 #define CONST_SCALE (ONE << CONST_BITS) | |
104 | |
105 /* Convert a positive real constant to an integer scaled by CONST_SCALE. | |
106 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, | |
107 * you will pay a significant penalty in run time. In that case, figure | |
108 * the correct integer constant values and insert them by hand. | |
109 */ | |
110 | |
111 /* Actually FIX is no longer used, we precomputed them all */ | |
1064 | 112 #define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) |
0 | 113 |
1064 | 114 /* Descale and correctly round an int32_t value that's scaled by N bits. |
0 | 115 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding |
116 * the fudge factor is correct for either sign of X. | |
117 */ | |
118 | |
119 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) | |
120 | |
1064 | 121 /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. |
0 | 122 * For 8-bit samples with the recommended scaling, all the variable |
123 * and constant values involved are no more than 16 bits wide, so a | |
124 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; | |
125 * this provides a useful speedup on many machines. | |
126 * There is no way to specify a 16x16->32 multiply in portable C, but | |
127 * some C compilers will do the right thing if you provide the correct | |
128 * combination of casts. | |
129 * NB: for 12-bit samples, a full 32-bit multiplication will be needed. | |
130 */ | |
131 | |
132 #ifdef EIGHT_BIT_SAMPLES | |
133 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ | |
1064 | 134 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) |
0 | 135 #endif |
136 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ | |
1064 | 137 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) |
0 | 138 #endif |
139 #endif | |
140 | |
141 #ifndef MULTIPLY /* default definition */ | |
142 #define MULTIPLY(var,const) ((var) * (const)) | |
143 #endif | |
144 | |
145 | |
146 /* | |
147 Unlike our decoder where we approximate the FIXes, we need to use exact | |
148 ones here or successive P-frames will drift too much with Reference frame coding | |
149 */ | |
150 #define FIX_0_211164243 1730 | |
151 #define FIX_0_275899380 2260 | |
152 #define FIX_0_298631336 2446 | |
153 #define FIX_0_390180644 3196 | |
154 #define FIX_0_509795579 4176 | |
155 #define FIX_0_541196100 4433 | |
156 #define FIX_0_601344887 4926 | |
157 #define FIX_0_765366865 6270 | |
158 #define FIX_0_785694958 6436 | |
159 #define FIX_0_899976223 7373 | |
160 #define FIX_1_061594337 8697 | |
161 #define FIX_1_111140466 9102 | |
162 #define FIX_1_175875602 9633 | |
163 #define FIX_1_306562965 10703 | |
164 #define FIX_1_387039845 11363 | |
165 #define FIX_1_451774981 11893 | |
166 #define FIX_1_501321110 12299 | |
167 #define FIX_1_662939225 13623 | |
168 #define FIX_1_847759065 15137 | |
169 #define FIX_1_961570560 16069 | |
170 #define FIX_2_053119869 16819 | |
171 #define FIX_2_172734803 17799 | |
172 #define FIX_2_562915447 20995 | |
173 #define FIX_3_072711026 25172 | |
174 | |
175 /* | |
176 * Perform the inverse DCT on one block of coefficients. | |
177 */ | |
178 | |
179 void j_rev_dct(DCTBLOCK data) | |
180 { | |
1064 | 181 int32_t tmp0, tmp1, tmp2, tmp3; |
182 int32_t tmp10, tmp11, tmp12, tmp13; | |
183 int32_t z1, z2, z3, z4, z5; | |
184 int32_t d0, d1, d2, d3, d4, d5, d6, d7; | |
0 | 185 register DCTELEM *dataptr; |
186 int rowctr; | |
187 | |
188 /* Pass 1: process rows. */ | |
189 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
190 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
191 | |
192 dataptr = data; | |
193 | |
194 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
195 /* Due to quantization, we will usually find that many of the input | |
196 * coefficients are zero, especially the AC terms. We can exploit this | |
197 * by short-circuiting the IDCT calculation for any row in which all | |
198 * the AC terms are zero. In that case each output is equal to the | |
199 * DC coefficient (with scale factor as needed). | |
200 * With typical images and quantization tables, half or more of the | |
201 * row DCT calculations can be simplified this way. | |
202 */ | |
203 | |
204 register int *idataptr = (int*)dataptr; | |
205 | |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
206 /* WARNING: we do the same permutation as MMX idct to simplify the |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
207 video core */ |
0 | 208 d0 = dataptr[0]; |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
209 d2 = dataptr[1]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
210 d4 = dataptr[2]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
211 d6 = dataptr[3]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
212 d1 = dataptr[4]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
213 d3 = dataptr[5]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
214 d5 = dataptr[6]; |
0 | 215 d7 = dataptr[7]; |
216 | |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
217 if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { |
0 | 218 /* AC terms all zero */ |
219 if (d0) { | |
220 /* Compute a 32 bit value to assign. */ | |
221 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
222 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
223 | |
224 idataptr[0] = v; | |
225 idataptr[1] = v; | |
226 idataptr[2] = v; | |
227 idataptr[3] = v; | |
228 } | |
229 | |
230 dataptr += DCTSIZE; /* advance pointer to next row */ | |
231 continue; | |
232 } | |
233 | |
234 /* Even part: reverse the even part of the forward DCT. */ | |
235 /* The rotator is sqrt(2)*c(-6). */ | |
236 { | |
237 if (d6) { | |
238 if (d2) { | |
239 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
240 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
241 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
242 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
243 | |
244 tmp0 = (d0 + d4) << CONST_BITS; | |
245 tmp1 = (d0 - d4) << CONST_BITS; | |
246 | |
247 tmp10 = tmp0 + tmp3; | |
248 tmp13 = tmp0 - tmp3; | |
249 tmp11 = tmp1 + tmp2; | |
250 tmp12 = tmp1 - tmp2; | |
251 } else { | |
252 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
253 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
254 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
255 | |
256 tmp0 = (d0 + d4) << CONST_BITS; | |
257 tmp1 = (d0 - d4) << CONST_BITS; | |
258 | |
259 tmp10 = tmp0 + tmp3; | |
260 tmp13 = tmp0 - tmp3; | |
261 tmp11 = tmp1 + tmp2; | |
262 tmp12 = tmp1 - tmp2; | |
263 } | |
2263 | 264 } else { |
0 | 265 if (d2) { |
266 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
267 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
268 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
269 | |
270 tmp0 = (d0 + d4) << CONST_BITS; | |
271 tmp1 = (d0 - d4) << CONST_BITS; | |
272 | |
273 tmp10 = tmp0 + tmp3; | |
274 tmp13 = tmp0 - tmp3; | |
275 tmp11 = tmp1 + tmp2; | |
276 tmp12 = tmp1 - tmp2; | |
277 } else { | |
278 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
279 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
280 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
281 } | |
282 } | |
283 | |
284 /* Odd part per figure 8; the matrix is unitary and hence its | |
285 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
286 */ | |
287 | |
288 if (d7) { | |
289 if (d5) { | |
290 if (d3) { | |
291 if (d1) { | |
292 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
293 z1 = d7 + d1; | |
294 z2 = d5 + d3; | |
295 z3 = d7 + d3; | |
296 z4 = d5 + d1; | |
297 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
298 | |
299 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
300 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
301 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
302 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
303 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
304 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
305 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
306 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
307 | |
308 z3 += z5; | |
309 z4 += z5; | |
310 | |
311 tmp0 += z1 + z3; | |
312 tmp1 += z2 + z4; | |
313 tmp2 += z2 + z3; | |
314 tmp3 += z1 + z4; | |
315 } else { | |
316 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
317 z2 = d5 + d3; | |
318 z3 = d7 + d3; | |
319 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
320 | |
321 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
322 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
323 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
324 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
325 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
326 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
327 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
328 | |
329 z3 += z5; | |
330 z4 += z5; | |
331 | |
332 tmp0 += z1 + z3; | |
333 tmp1 += z2 + z4; | |
334 tmp2 += z2 + z3; | |
335 tmp3 = z1 + z4; | |
336 } | |
337 } else { | |
338 if (d1) { | |
339 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
340 z1 = d7 + d1; | |
341 z4 = d5 + d1; | |
342 z5 = MULTIPLY(d7 + z4, FIX_1_175875602); | |
343 | |
344 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
345 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
346 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
347 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
348 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
349 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
350 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
351 | |
352 z3 += z5; | |
353 z4 += z5; | |
354 | |
355 tmp0 += z1 + z3; | |
356 tmp1 += z2 + z4; | |
357 tmp2 = z2 + z3; | |
358 tmp3 += z1 + z4; | |
359 } else { | |
360 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
361 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
362 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
363 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
364 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
365 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
366 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
367 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
368 | |
369 z3 += z5; | |
370 z4 += z5; | |
371 | |
372 tmp0 += z3; | |
373 tmp1 += z4; | |
374 tmp2 = z2 + z3; | |
375 tmp3 = z1 + z4; | |
376 } | |
377 } | |
378 } else { | |
379 if (d3) { | |
380 if (d1) { | |
381 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
382 z1 = d7 + d1; | |
383 z3 = d7 + d3; | |
384 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
385 | |
386 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
387 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
388 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
389 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
390 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
391 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
392 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
393 | |
394 z3 += z5; | |
395 z4 += z5; | |
396 | |
397 tmp0 += z1 + z3; | |
398 tmp1 = z2 + z4; | |
399 tmp2 += z2 + z3; | |
400 tmp3 += z1 + z4; | |
401 } else { | |
402 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
403 z3 = d7 + d3; | |
404 | |
405 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
406 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
407 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
408 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
409 z5 = MULTIPLY(z3, FIX_1_175875602); | |
410 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
411 | |
412 tmp0 += z3; | |
413 tmp1 = z2 + z5; | |
414 tmp2 += z3; | |
415 tmp3 = z1 + z5; | |
416 } | |
417 } else { | |
418 if (d1) { | |
419 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
420 z1 = d7 + d1; | |
421 z5 = MULTIPLY(z1, FIX_1_175875602); | |
422 | |
423 z1 = MULTIPLY(z1, FIX_0_275899380); | |
424 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
425 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
426 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
427 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
428 | |
429 tmp0 += z1; | |
430 tmp1 = z4 + z5; | |
431 tmp2 = z3 + z5; | |
432 tmp3 += z1; | |
433 } else { | |
434 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
435 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
436 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
437 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
438 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
439 } | |
440 } | |
441 } | |
442 } else { | |
443 if (d5) { | |
444 if (d3) { | |
445 if (d1) { | |
446 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
447 z2 = d5 + d3; | |
448 z4 = d5 + d1; | |
449 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
450 | |
451 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
452 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
453 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
454 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
455 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
456 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
457 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
458 | |
459 z3 += z5; | |
460 z4 += z5; | |
461 | |
462 tmp0 = z1 + z3; | |
463 tmp1 += z2 + z4; | |
464 tmp2 += z2 + z3; | |
465 tmp3 += z1 + z4; | |
466 } else { | |
467 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
468 z2 = d5 + d3; | |
469 | |
470 z5 = MULTIPLY(z2, FIX_1_175875602); | |
471 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
472 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
473 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
474 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
475 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
476 | |
477 tmp0 = z3 + z5; | |
478 tmp1 += z2; | |
479 tmp2 += z2; | |
480 tmp3 = z4 + z5; | |
481 } | |
482 } else { | |
483 if (d1) { | |
484 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
485 z4 = d5 + d1; | |
486 | |
487 z5 = MULTIPLY(z4, FIX_1_175875602); | |
488 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
489 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
490 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
491 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
492 z4 = MULTIPLY(z4, FIX_0_785694958); | |
493 | |
494 tmp0 = z1 + z5; | |
495 tmp1 += z4; | |
496 tmp2 = z2 + z5; | |
497 tmp3 += z4; | |
498 } else { | |
499 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
500 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
501 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
502 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
503 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
504 } | |
505 } | |
506 } else { | |
507 if (d3) { | |
508 if (d1) { | |
509 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
510 z5 = d1 + d3; | |
511 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
512 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
513 z1 = MULTIPLY(d1, FIX_1_061594337); | |
514 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
515 z4 = MULTIPLY(z5, FIX_0_785694958); | |
516 z5 = MULTIPLY(z5, FIX_1_175875602); | |
517 | |
518 tmp0 = z1 - z4; | |
519 tmp1 = z2 + z4; | |
520 tmp2 += z5; | |
521 tmp3 += z5; | |
522 } else { | |
523 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
524 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
525 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
526 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
527 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
528 } | |
529 } else { | |
530 if (d1) { | |
531 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
532 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
533 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
534 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
535 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
536 } else { | |
537 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
538 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
539 } | |
540 } | |
541 } | |
542 } | |
543 } | |
544 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
545 | |
546 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
547 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
548 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
549 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
550 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
551 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
552 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
553 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
554 | |
555 dataptr += DCTSIZE; /* advance pointer to next row */ | |
556 } | |
557 | |
558 /* Pass 2: process columns. */ | |
559 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
560 /* and also undo the PASS1_BITS scaling. */ | |
561 | |
562 dataptr = data; | |
563 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
564 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
565 * However, the row calculation has created many nonzero AC terms, so the | |
566 * simplification applies less often (typically 5% to 10% of the time). | |
567 * On machines with very fast multiplication, it's possible that the | |
568 * test takes more time than it's worth. In that case this section | |
569 * may be commented out. | |
570 */ | |
571 | |
572 d0 = dataptr[DCTSIZE*0]; | |
573 d1 = dataptr[DCTSIZE*1]; | |
574 d2 = dataptr[DCTSIZE*2]; | |
575 d3 = dataptr[DCTSIZE*3]; | |
576 d4 = dataptr[DCTSIZE*4]; | |
577 d5 = dataptr[DCTSIZE*5]; | |
578 d6 = dataptr[DCTSIZE*6]; | |
579 d7 = dataptr[DCTSIZE*7]; | |
580 | |
581 /* Even part: reverse the even part of the forward DCT. */ | |
582 /* The rotator is sqrt(2)*c(-6). */ | |
583 if (d6) { | |
584 if (d2) { | |
585 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
586 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
587 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
588 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
589 | |
590 tmp0 = (d0 + d4) << CONST_BITS; | |
591 tmp1 = (d0 - d4) << CONST_BITS; | |
592 | |
593 tmp10 = tmp0 + tmp3; | |
594 tmp13 = tmp0 - tmp3; | |
595 tmp11 = tmp1 + tmp2; | |
596 tmp12 = tmp1 - tmp2; | |
597 } else { | |
598 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
599 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
600 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
601 | |
602 tmp0 = (d0 + d4) << CONST_BITS; | |
603 tmp1 = (d0 - d4) << CONST_BITS; | |
604 | |
605 tmp10 = tmp0 + tmp3; | |
606 tmp13 = tmp0 - tmp3; | |
607 tmp11 = tmp1 + tmp2; | |
608 tmp12 = tmp1 - tmp2; | |
609 } | |
2263 | 610 } else { |
0 | 611 if (d2) { |
612 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
613 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
614 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
615 | |
616 tmp0 = (d0 + d4) << CONST_BITS; | |
617 tmp1 = (d0 - d4) << CONST_BITS; | |
618 | |
619 tmp10 = tmp0 + tmp3; | |
620 tmp13 = tmp0 - tmp3; | |
621 tmp11 = tmp1 + tmp2; | |
622 tmp12 = tmp1 - tmp2; | |
623 } else { | |
624 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
625 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
626 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
627 } | |
628 } | |
629 | |
630 /* Odd part per figure 8; the matrix is unitary and hence its | |
631 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
632 */ | |
633 if (d7) { | |
634 if (d5) { | |
635 if (d3) { | |
636 if (d1) { | |
637 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
638 z1 = d7 + d1; | |
639 z2 = d5 + d3; | |
640 z3 = d7 + d3; | |
641 z4 = d5 + d1; | |
642 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
643 | |
644 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
645 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
646 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
647 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
648 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
649 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
650 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
651 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
652 | |
653 z3 += z5; | |
654 z4 += z5; | |
655 | |
656 tmp0 += z1 + z3; | |
657 tmp1 += z2 + z4; | |
658 tmp2 += z2 + z3; | |
659 tmp3 += z1 + z4; | |
660 } else { | |
661 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
662 z1 = d7; | |
663 z2 = d5 + d3; | |
664 z3 = d7 + d3; | |
665 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
666 | |
667 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
668 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
669 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
670 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
671 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
672 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
673 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
674 | |
675 z3 += z5; | |
676 z4 += z5; | |
677 | |
678 tmp0 += z1 + z3; | |
679 tmp1 += z2 + z4; | |
680 tmp2 += z2 + z3; | |
681 tmp3 = z1 + z4; | |
682 } | |
683 } else { | |
684 if (d1) { | |
685 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
686 z1 = d7 + d1; | |
687 z2 = d5; | |
688 z3 = d7; | |
689 z4 = d5 + d1; | |
690 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
691 | |
692 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
693 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
694 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
695 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
696 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
697 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
698 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
699 | |
700 z3 += z5; | |
701 z4 += z5; | |
702 | |
703 tmp0 += z1 + z3; | |
704 tmp1 += z2 + z4; | |
705 tmp2 = z2 + z3; | |
706 tmp3 += z1 + z4; | |
707 } else { | |
708 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
709 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
710 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
711 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
712 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
713 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
714 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
715 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
716 | |
717 z3 += z5; | |
718 z4 += z5; | |
719 | |
720 tmp0 += z3; | |
721 tmp1 += z4; | |
722 tmp2 = z2 + z3; | |
723 tmp3 = z1 + z4; | |
724 } | |
725 } | |
726 } else { | |
727 if (d3) { | |
728 if (d1) { | |
729 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
730 z1 = d7 + d1; | |
731 z3 = d7 + d3; | |
732 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
733 | |
734 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
735 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
736 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
737 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
738 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
739 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
740 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
741 | |
742 z3 += z5; | |
743 z4 += z5; | |
744 | |
745 tmp0 += z1 + z3; | |
746 tmp1 = z2 + z4; | |
747 tmp2 += z2 + z3; | |
748 tmp3 += z1 + z4; | |
749 } else { | |
750 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
751 z3 = d7 + d3; | |
752 | |
753 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
754 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
755 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
756 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
757 z5 = MULTIPLY(z3, FIX_1_175875602); | |
758 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
759 | |
760 tmp0 += z3; | |
761 tmp1 = z2 + z5; | |
762 tmp2 += z3; | |
763 tmp3 = z1 + z5; | |
764 } | |
765 } else { | |
766 if (d1) { | |
767 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
768 z1 = d7 + d1; | |
769 z5 = MULTIPLY(z1, FIX_1_175875602); | |
770 | |
771 z1 = MULTIPLY(z1, FIX_0_275899380); | |
772 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
773 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
774 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
775 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
776 | |
777 tmp0 += z1; | |
778 tmp1 = z4 + z5; | |
779 tmp2 = z3 + z5; | |
780 tmp3 += z1; | |
781 } else { | |
782 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
783 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
784 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
785 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
786 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
787 } | |
788 } | |
789 } | |
790 } else { | |
791 if (d5) { | |
792 if (d3) { | |
793 if (d1) { | |
794 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
795 z2 = d5 + d3; | |
796 z4 = d5 + d1; | |
797 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
798 | |
799 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
800 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
801 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
802 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
803 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
804 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
805 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
806 | |
807 z3 += z5; | |
808 z4 += z5; | |
809 | |
810 tmp0 = z1 + z3; | |
811 tmp1 += z2 + z4; | |
812 tmp2 += z2 + z3; | |
813 tmp3 += z1 + z4; | |
814 } else { | |
815 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
816 z2 = d5 + d3; | |
817 | |
818 z5 = MULTIPLY(z2, FIX_1_175875602); | |
819 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
820 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
821 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
822 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
823 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
824 | |
825 tmp0 = z3 + z5; | |
826 tmp1 += z2; | |
827 tmp2 += z2; | |
828 tmp3 = z4 + z5; | |
829 } | |
830 } else { | |
831 if (d1) { | |
832 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
833 z4 = d5 + d1; | |
834 | |
835 z5 = MULTIPLY(z4, FIX_1_175875602); | |
836 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
837 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
838 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
839 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
840 z4 = MULTIPLY(z4, FIX_0_785694958); | |
841 | |
842 tmp0 = z1 + z5; | |
843 tmp1 += z4; | |
844 tmp2 = z2 + z5; | |
845 tmp3 += z4; | |
846 } else { | |
847 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
848 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
849 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
850 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
851 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
852 } | |
853 } | |
854 } else { | |
855 if (d3) { | |
856 if (d1) { | |
857 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
858 z5 = d1 + d3; | |
859 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
860 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
861 z1 = MULTIPLY(d1, FIX_1_061594337); | |
862 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
863 z4 = MULTIPLY(z5, FIX_0_785694958); | |
864 z5 = MULTIPLY(z5, FIX_1_175875602); | |
865 | |
866 tmp0 = z1 - z4; | |
867 tmp1 = z2 + z4; | |
868 tmp2 += z5; | |
869 tmp3 += z5; | |
870 } else { | |
871 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
872 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
873 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
874 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
875 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
876 } | |
877 } else { | |
878 if (d1) { | |
879 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
880 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
881 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
882 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
883 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
884 } else { | |
885 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
886 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
887 } | |
888 } | |
889 } | |
890 } | |
891 | |
892 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
893 | |
894 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, | |
895 CONST_BITS+PASS1_BITS+3); | |
896 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, | |
897 CONST_BITS+PASS1_BITS+3); | |
898 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, | |
899 CONST_BITS+PASS1_BITS+3); | |
900 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, | |
901 CONST_BITS+PASS1_BITS+3); | |
902 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, | |
903 CONST_BITS+PASS1_BITS+3); | |
904 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, | |
905 CONST_BITS+PASS1_BITS+3); | |
906 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, | |
907 CONST_BITS+PASS1_BITS+3); | |
908 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, | |
909 CONST_BITS+PASS1_BITS+3); | |
910 | |
911 dataptr++; /* advance pointer to next column */ | |
912 } | |
913 } | |
914 | |
2256 | 915 #undef DCTSIZE |
916 #define DCTSIZE 4 | |
917 #define DCTSTRIDE 8 | |
918 | |
919 void j_rev_dct4(DCTBLOCK data) | |
920 { | |
921 int32_t tmp0, tmp1, tmp2, tmp3; | |
922 int32_t tmp10, tmp11, tmp12, tmp13; | |
923 int32_t z1; | |
924 int32_t d0, d2, d4, d6; | |
925 register DCTELEM *dataptr; | |
926 int rowctr; | |
2262 | 927 |
2256 | 928 /* Pass 1: process rows. */ |
929 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
930 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
931 | |
2262 | 932 data[0] += 4; |
933 | |
2256 | 934 dataptr = data; |
935 | |
936 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
937 /* Due to quantization, we will usually find that many of the input | |
938 * coefficients are zero, especially the AC terms. We can exploit this | |
939 * by short-circuiting the IDCT calculation for any row in which all | |
940 * the AC terms are zero. In that case each output is equal to the | |
941 * DC coefficient (with scale factor as needed). | |
942 * With typical images and quantization tables, half or more of the | |
943 * row DCT calculations can be simplified this way. | |
944 */ | |
945 | |
946 register int *idataptr = (int*)dataptr; | |
947 | |
948 d0 = dataptr[0]; | |
949 d2 = dataptr[1]; | |
950 d4 = dataptr[2]; | |
951 d6 = dataptr[3]; | |
952 | |
953 if ((d2 | d4 | d6) == 0) { | |
954 /* AC terms all zero */ | |
955 if (d0) { | |
956 /* Compute a 32 bit value to assign. */ | |
957 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
958 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
959 | |
960 idataptr[0] = v; | |
961 idataptr[1] = v; | |
962 } | |
963 | |
964 dataptr += DCTSTRIDE; /* advance pointer to next row */ | |
965 continue; | |
966 } | |
2262 | 967 |
2256 | 968 /* Even part: reverse the even part of the forward DCT. */ |
969 /* The rotator is sqrt(2)*c(-6). */ | |
970 if (d6) { | |
971 if (d2) { | |
972 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
973 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
974 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
975 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
976 | |
977 tmp0 = (d0 + d4) << CONST_BITS; | |
978 tmp1 = (d0 - d4) << CONST_BITS; | |
979 | |
980 tmp10 = tmp0 + tmp3; | |
981 tmp13 = tmp0 - tmp3; | |
982 tmp11 = tmp1 + tmp2; | |
983 tmp12 = tmp1 - tmp2; | |
984 } else { | |
985 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
986 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
987 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
988 | |
989 tmp0 = (d0 + d4) << CONST_BITS; | |
990 tmp1 = (d0 - d4) << CONST_BITS; | |
991 | |
992 tmp10 = tmp0 + tmp3; | |
993 tmp13 = tmp0 - tmp3; | |
994 tmp11 = tmp1 + tmp2; | |
995 tmp12 = tmp1 - tmp2; | |
996 } | |
2262 | 997 } else { |
2256 | 998 if (d2) { |
999 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
1000 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
1001 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
1002 | |
1003 tmp0 = (d0 + d4) << CONST_BITS; | |
1004 tmp1 = (d0 - d4) << CONST_BITS; | |
1005 | |
1006 tmp10 = tmp0 + tmp3; | |
1007 tmp13 = tmp0 - tmp3; | |
1008 tmp11 = tmp1 + tmp2; | |
1009 tmp12 = tmp1 - tmp2; | |
1010 } else { | |
1011 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
1012 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
1013 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
1014 } | |
1015 } | |
1016 | |
1017 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
1018 | |
1019 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); | |
1020 dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); | |
1021 dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); | |
1022 dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); | |
1023 | |
1024 dataptr += DCTSTRIDE; /* advance pointer to next row */ | |
1025 } | |
1026 | |
1027 /* Pass 2: process columns. */ | |
1028 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
1029 /* and also undo the PASS1_BITS scaling. */ | |
1030 | |
1031 dataptr = data; | |
1032 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
1033 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
1034 * However, the row calculation has created many nonzero AC terms, so the | |
1035 * simplification applies less often (typically 5% to 10% of the time). | |
1036 * On machines with very fast multiplication, it's possible that the | |
1037 * test takes more time than it's worth. In that case this section | |
1038 * may be commented out. | |
1039 */ | |
1040 | |
1041 d0 = dataptr[DCTSTRIDE*0]; | |
1042 d2 = dataptr[DCTSTRIDE*1]; | |
1043 d4 = dataptr[DCTSTRIDE*2]; | |
1044 d6 = dataptr[DCTSTRIDE*3]; | |
1045 | |
1046 /* Even part: reverse the even part of the forward DCT. */ | |
1047 /* The rotator is sqrt(2)*c(-6). */ | |
1048 if (d6) { | |
1049 if (d2) { | |
1050 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
1051 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
1052 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
1053 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
1054 | |
1055 tmp0 = (d0 + d4) << CONST_BITS; | |
1056 tmp1 = (d0 - d4) << CONST_BITS; | |
1057 | |
1058 tmp10 = tmp0 + tmp3; | |
1059 tmp13 = tmp0 - tmp3; | |
1060 tmp11 = tmp1 + tmp2; | |
1061 tmp12 = tmp1 - tmp2; | |
1062 } else { | |
1063 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
1064 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
1065 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
1066 | |
1067 tmp0 = (d0 + d4) << CONST_BITS; | |
1068 tmp1 = (d0 - d4) << CONST_BITS; | |
1069 | |
1070 tmp10 = tmp0 + tmp3; | |
1071 tmp13 = tmp0 - tmp3; | |
1072 tmp11 = tmp1 + tmp2; | |
1073 tmp12 = tmp1 - tmp2; | |
1074 } | |
2262 | 1075 } else { |
2256 | 1076 if (d2) { |
1077 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
1078 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
1079 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
1080 | |
1081 tmp0 = (d0 + d4) << CONST_BITS; | |
1082 tmp1 = (d0 - d4) << CONST_BITS; | |
1083 | |
1084 tmp10 = tmp0 + tmp3; | |
1085 tmp13 = tmp0 - tmp3; | |
1086 tmp11 = tmp1 + tmp2; | |
1087 tmp12 = tmp1 - tmp2; | |
1088 } else { | |
1089 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
1090 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
1091 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
1092 } | |
1093 } | |
1094 | |
1095 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
1096 | |
2262 | 1097 dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); |
1098 dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); | |
1099 dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); | |
1100 dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); | |
2256 | 1101 |
1102 dataptr++; /* advance pointer to next column */ | |
1103 } | |
1104 } | |
1105 | |
2257 | 1106 void j_rev_dct2(DCTBLOCK data){ |
1107 int d00, d01, d10, d11; | |
1108 | |
1109 data[0] += 4; | |
1110 d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; | |
1111 d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; | |
1112 d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; | |
1113 d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; | |
1114 | |
1115 data[0+0*DCTSTRIDE]= (d00 + d10)>>3; | |
1116 data[1+0*DCTSTRIDE]= (d01 + d11)>>3; | |
1117 data[0+1*DCTSTRIDE]= (d00 - d10)>>3; | |
1118 data[1+1*DCTSTRIDE]= (d01 - d11)>>3; | |
1119 } | |
2256 | 1120 |
2259 | 1121 void j_rev_dct1(DCTBLOCK data){ |
1122 data[0] = (data[0] + 4)>>3; | |
1123 } | |
1124 | |
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1125 #undef FIX |
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1126 #undef CONST_BITS |