Mercurial > libavcodec.hg
annotate jrevdct.c @ 478:055d9ac1584d libavcodec
use ff_idct_put/add()
author | bellard |
---|---|
date | Wed, 05 Jun 2002 18:36:03 +0000 |
parents | 000aeeac27a2 |
children | b32afefe7d33 |
rev | line source |
---|---|
0 | 1 /* |
2 * jrevdct.c | |
3 * | |
4 * Copyright (C) 1991, 1992, Thomas G. Lane. | |
5 * This file is part of the Independent JPEG Group's software. | |
6 * For conditions of distribution and use, see the accompanying README file. | |
7 * | |
8 * This file contains the basic inverse-DCT transformation subroutine. | |
9 * | |
10 * This implementation is based on an algorithm described in | |
11 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | |
12 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | |
13 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | |
14 * The primary algorithm described there uses 11 multiplies and 29 adds. | |
15 * We use their alternate method with 12 multiplies and 32 adds. | |
16 * The advantage of this method is that no data path contains more than one | |
17 * multiplication; this allows a very simple and accurate implementation in | |
18 * scaled fixed-point arithmetic, with a minimal number of shifts. | |
19 * | |
20 * I've made lots of modifications to attempt to take advantage of the | |
21 * sparse nature of the DCT matrices we're getting. Although the logic | |
22 * is cumbersome, it's straightforward and the resulting code is much | |
23 * faster. | |
24 * | |
25 * A better way to do this would be to pass in the DCT block as a sparse | |
26 * matrix, perhaps with the difference cases encoded. | |
27 */ | |
28 #include "common.h" | |
29 #include "dsputil.h" | |
30 | |
31 #define EIGHT_BIT_SAMPLES | |
32 | |
33 #define DCTSIZE 8 | |
34 #define DCTSIZE2 64 | |
35 | |
36 #define GLOBAL | |
37 | |
38 #define RIGHT_SHIFT(x, n) ((x) >> (n)) | |
39 | |
40 typedef DCTELEM DCTBLOCK[DCTSIZE2]; | |
41 | |
42 #define CONST_BITS 13 | |
43 | |
44 /* | |
45 * This routine is specialized to the case DCTSIZE = 8. | |
46 */ | |
47 | |
48 #if DCTSIZE != 8 | |
49 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ | |
50 #endif | |
51 | |
52 | |
53 /* | |
54 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT | |
55 * on each column. Direct algorithms are also available, but they are | |
56 * much more complex and seem not to be any faster when reduced to code. | |
57 * | |
58 * The poop on this scaling stuff is as follows: | |
59 * | |
60 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) | |
61 * larger than the true IDCT outputs. The final outputs are therefore | |
62 * a factor of N larger than desired; since N=8 this can be cured by | |
63 * a simple right shift at the end of the algorithm. The advantage of | |
64 * this arrangement is that we save two multiplications per 1-D IDCT, | |
65 * because the y0 and y4 inputs need not be divided by sqrt(N). | |
66 * | |
67 * We have to do addition and subtraction of the integer inputs, which | |
68 * is no problem, and multiplication by fractional constants, which is | |
69 * a problem to do in integer arithmetic. We multiply all the constants | |
70 * by CONST_SCALE and convert them to integer constants (thus retaining | |
71 * CONST_BITS bits of precision in the constants). After doing a | |
72 * multiplication we have to divide the product by CONST_SCALE, with proper | |
73 * rounding, to produce the correct output. This division can be done | |
74 * cheaply as a right shift of CONST_BITS bits. We postpone shifting | |
75 * as long as possible so that partial sums can be added together with | |
76 * full fractional precision. | |
77 * | |
78 * The outputs of the first pass are scaled up by PASS1_BITS bits so that | |
79 * they are represented to better-than-integral precision. These outputs | |
80 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | |
81 * with the recommended scaling. (To scale up 12-bit sample data further, an | |
82 * intermediate int32 array would be needed.) | |
83 * | |
84 * To avoid overflow of the 32-bit intermediate results in pass 2, we must | |
85 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis | |
86 * shows that the values given below are the most effective. | |
87 */ | |
88 | |
89 #ifdef EIGHT_BIT_SAMPLES | |
90 #define PASS1_BITS 2 | |
91 #else | |
92 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | |
93 #endif | |
94 | |
95 #define ONE ((INT32) 1) | |
96 | |
97 #define CONST_SCALE (ONE << CONST_BITS) | |
98 | |
99 /* Convert a positive real constant to an integer scaled by CONST_SCALE. | |
100 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, | |
101 * you will pay a significant penalty in run time. In that case, figure | |
102 * the correct integer constant values and insert them by hand. | |
103 */ | |
104 | |
105 /* Actually FIX is no longer used, we precomputed them all */ | |
106 #define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) | |
107 | |
108 /* Descale and correctly round an INT32 value that's scaled by N bits. | |
109 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding | |
110 * the fudge factor is correct for either sign of X. | |
111 */ | |
112 | |
113 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) | |
114 | |
115 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. | |
116 * For 8-bit samples with the recommended scaling, all the variable | |
117 * and constant values involved are no more than 16 bits wide, so a | |
118 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; | |
119 * this provides a useful speedup on many machines. | |
120 * There is no way to specify a 16x16->32 multiply in portable C, but | |
121 * some C compilers will do the right thing if you provide the correct | |
122 * combination of casts. | |
123 * NB: for 12-bit samples, a full 32-bit multiplication will be needed. | |
124 */ | |
125 | |
126 #ifdef EIGHT_BIT_SAMPLES | |
127 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ | |
128 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) | |
129 #endif | |
130 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ | |
131 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const))) | |
132 #endif | |
133 #endif | |
134 | |
135 #ifndef MULTIPLY /* default definition */ | |
136 #define MULTIPLY(var,const) ((var) * (const)) | |
137 #endif | |
138 | |
139 | |
140 /* | |
141 Unlike our decoder where we approximate the FIXes, we need to use exact | |
142 ones here or successive P-frames will drift too much with Reference frame coding | |
143 */ | |
144 #define FIX_0_211164243 1730 | |
145 #define FIX_0_275899380 2260 | |
146 #define FIX_0_298631336 2446 | |
147 #define FIX_0_390180644 3196 | |
148 #define FIX_0_509795579 4176 | |
149 #define FIX_0_541196100 4433 | |
150 #define FIX_0_601344887 4926 | |
151 #define FIX_0_765366865 6270 | |
152 #define FIX_0_785694958 6436 | |
153 #define FIX_0_899976223 7373 | |
154 #define FIX_1_061594337 8697 | |
155 #define FIX_1_111140466 9102 | |
156 #define FIX_1_175875602 9633 | |
157 #define FIX_1_306562965 10703 | |
158 #define FIX_1_387039845 11363 | |
159 #define FIX_1_451774981 11893 | |
160 #define FIX_1_501321110 12299 | |
161 #define FIX_1_662939225 13623 | |
162 #define FIX_1_847759065 15137 | |
163 #define FIX_1_961570560 16069 | |
164 #define FIX_2_053119869 16819 | |
165 #define FIX_2_172734803 17799 | |
166 #define FIX_2_562915447 20995 | |
167 #define FIX_3_072711026 25172 | |
168 | |
169 /* | |
170 * Perform the inverse DCT on one block of coefficients. | |
171 */ | |
172 | |
173 void j_rev_dct(DCTBLOCK data) | |
174 { | |
175 INT32 tmp0, tmp1, tmp2, tmp3; | |
176 INT32 tmp10, tmp11, tmp12, tmp13; | |
177 INT32 z1, z2, z3, z4, z5; | |
178 INT32 d0, d1, d2, d3, d4, d5, d6, d7; | |
179 register DCTELEM *dataptr; | |
180 int rowctr; | |
181 | |
182 /* Pass 1: process rows. */ | |
183 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
184 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
185 | |
186 dataptr = data; | |
187 | |
188 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
189 /* Due to quantization, we will usually find that many of the input | |
190 * coefficients are zero, especially the AC terms. We can exploit this | |
191 * by short-circuiting the IDCT calculation for any row in which all | |
192 * the AC terms are zero. In that case each output is equal to the | |
193 * DC coefficient (with scale factor as needed). | |
194 * With typical images and quantization tables, half or more of the | |
195 * row DCT calculations can be simplified this way. | |
196 */ | |
197 | |
198 register int *idataptr = (int*)dataptr; | |
199 | |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
200 /* WARNING: we do the same permutation as MMX idct to simplify the |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
201 video core */ |
0 | 202 d0 = dataptr[0]; |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
203 d2 = dataptr[1]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
204 d4 = dataptr[2]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
205 d6 = dataptr[3]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
206 d1 = dataptr[4]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
207 d3 = dataptr[5]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
208 d5 = dataptr[6]; |
0 | 209 d7 = dataptr[7]; |
210 | |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
211 if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { |
0 | 212 /* AC terms all zero */ |
213 if (d0) { | |
214 /* Compute a 32 bit value to assign. */ | |
215 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
216 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
217 | |
218 idataptr[0] = v; | |
219 idataptr[1] = v; | |
220 idataptr[2] = v; | |
221 idataptr[3] = v; | |
222 } | |
223 | |
224 dataptr += DCTSIZE; /* advance pointer to next row */ | |
225 continue; | |
226 } | |
227 | |
228 /* Even part: reverse the even part of the forward DCT. */ | |
229 /* The rotator is sqrt(2)*c(-6). */ | |
230 { | |
231 if (d6) { | |
232 if (d4) { | |
233 if (d2) { | |
234 if (d0) { | |
235 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
236 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
237 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
238 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
239 | |
240 tmp0 = (d0 + d4) << CONST_BITS; | |
241 tmp1 = (d0 - d4) << CONST_BITS; | |
242 | |
243 tmp10 = tmp0 + tmp3; | |
244 tmp13 = tmp0 - tmp3; | |
245 tmp11 = tmp1 + tmp2; | |
246 tmp12 = tmp1 - tmp2; | |
247 } else { | |
248 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ | |
249 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
250 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
251 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
252 | |
253 tmp0 = d4 << CONST_BITS; | |
254 | |
255 tmp10 = tmp0 + tmp3; | |
256 tmp13 = tmp0 - tmp3; | |
257 tmp11 = tmp2 - tmp0; | |
258 tmp12 = -(tmp0 + tmp2); | |
259 } | |
260 } else { | |
261 if (d0) { | |
262 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
263 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
264 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
265 | |
266 tmp0 = (d0 + d4) << CONST_BITS; | |
267 tmp1 = (d0 - d4) << CONST_BITS; | |
268 | |
269 tmp10 = tmp0 + tmp3; | |
270 tmp13 = tmp0 - tmp3; | |
271 tmp11 = tmp1 + tmp2; | |
272 tmp12 = tmp1 - tmp2; | |
273 } else { | |
274 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ | |
275 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
276 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
277 | |
278 tmp0 = d4 << CONST_BITS; | |
279 | |
280 tmp10 = tmp0 + tmp3; | |
281 tmp13 = tmp0 - tmp3; | |
282 tmp11 = tmp2 - tmp0; | |
283 tmp12 = -(tmp0 + tmp2); | |
284 } | |
285 } | |
286 } else { | |
287 if (d2) { | |
288 if (d0) { | |
289 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ | |
290 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
291 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
292 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
293 | |
294 tmp0 = d0 << CONST_BITS; | |
295 | |
296 tmp10 = tmp0 + tmp3; | |
297 tmp13 = tmp0 - tmp3; | |
298 tmp11 = tmp0 + tmp2; | |
299 tmp12 = tmp0 - tmp2; | |
300 } else { | |
301 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ | |
302 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
303 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
304 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
305 | |
306 tmp10 = tmp3; | |
307 tmp13 = -tmp3; | |
308 tmp11 = tmp2; | |
309 tmp12 = -tmp2; | |
310 } | |
311 } else { | |
312 if (d0) { | |
313 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ | |
314 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
315 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
316 | |
317 tmp0 = d0 << CONST_BITS; | |
318 | |
319 tmp10 = tmp0 + tmp3; | |
320 tmp13 = tmp0 - tmp3; | |
321 tmp11 = tmp0 + tmp2; | |
322 tmp12 = tmp0 - tmp2; | |
323 } else { | |
324 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ | |
325 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
326 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
327 | |
328 tmp10 = tmp3; | |
329 tmp13 = -tmp3; | |
330 tmp11 = tmp2; | |
331 tmp12 = -tmp2; | |
332 } | |
333 } | |
334 } | |
335 } else { | |
336 if (d4) { | |
337 if (d2) { | |
338 if (d0) { | |
339 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
340 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
341 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
342 | |
343 tmp0 = (d0 + d4) << CONST_BITS; | |
344 tmp1 = (d0 - d4) << CONST_BITS; | |
345 | |
346 tmp10 = tmp0 + tmp3; | |
347 tmp13 = tmp0 - tmp3; | |
348 tmp11 = tmp1 + tmp2; | |
349 tmp12 = tmp1 - tmp2; | |
350 } else { | |
351 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ | |
352 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
353 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
354 | |
355 tmp0 = d4 << CONST_BITS; | |
356 | |
357 tmp10 = tmp0 + tmp3; | |
358 tmp13 = tmp0 - tmp3; | |
359 tmp11 = tmp2 - tmp0; | |
360 tmp12 = -(tmp0 + tmp2); | |
361 } | |
362 } else { | |
363 if (d0) { | |
364 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
365 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
366 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
367 } else { | |
368 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ | |
369 tmp10 = tmp13 = d4 << CONST_BITS; | |
370 tmp11 = tmp12 = -tmp10; | |
371 } | |
372 } | |
373 } else { | |
374 if (d2) { | |
375 if (d0) { | |
376 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ | |
377 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
378 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
379 | |
380 tmp0 = d0 << CONST_BITS; | |
381 | |
382 tmp10 = tmp0 + tmp3; | |
383 tmp13 = tmp0 - tmp3; | |
384 tmp11 = tmp0 + tmp2; | |
385 tmp12 = tmp0 - tmp2; | |
386 } else { | |
387 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ | |
388 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
389 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
390 | |
391 tmp10 = tmp3; | |
392 tmp13 = -tmp3; | |
393 tmp11 = tmp2; | |
394 tmp12 = -tmp2; | |
395 } | |
396 } else { | |
397 if (d0) { | |
398 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ | |
399 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; | |
400 } else { | |
401 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ | |
402 tmp10 = tmp13 = tmp11 = tmp12 = 0; | |
403 } | |
404 } | |
405 } | |
406 } | |
407 | |
408 /* Odd part per figure 8; the matrix is unitary and hence its | |
409 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
410 */ | |
411 | |
412 if (d7) { | |
413 if (d5) { | |
414 if (d3) { | |
415 if (d1) { | |
416 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
417 z1 = d7 + d1; | |
418 z2 = d5 + d3; | |
419 z3 = d7 + d3; | |
420 z4 = d5 + d1; | |
421 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
422 | |
423 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
424 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
425 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
426 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
427 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
428 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
429 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
430 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
431 | |
432 z3 += z5; | |
433 z4 += z5; | |
434 | |
435 tmp0 += z1 + z3; | |
436 tmp1 += z2 + z4; | |
437 tmp2 += z2 + z3; | |
438 tmp3 += z1 + z4; | |
439 } else { | |
440 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
441 z2 = d5 + d3; | |
442 z3 = d7 + d3; | |
443 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
444 | |
445 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
446 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
447 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
448 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
449 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
450 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
451 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
452 | |
453 z3 += z5; | |
454 z4 += z5; | |
455 | |
456 tmp0 += z1 + z3; | |
457 tmp1 += z2 + z4; | |
458 tmp2 += z2 + z3; | |
459 tmp3 = z1 + z4; | |
460 } | |
461 } else { | |
462 if (d1) { | |
463 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
464 z1 = d7 + d1; | |
465 z4 = d5 + d1; | |
466 z5 = MULTIPLY(d7 + z4, FIX_1_175875602); | |
467 | |
468 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
469 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
470 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
471 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
472 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
473 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
474 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
475 | |
476 z3 += z5; | |
477 z4 += z5; | |
478 | |
479 tmp0 += z1 + z3; | |
480 tmp1 += z2 + z4; | |
481 tmp2 = z2 + z3; | |
482 tmp3 += z1 + z4; | |
483 } else { | |
484 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
485 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
486 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
487 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
488 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
489 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
490 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
491 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
492 | |
493 z3 += z5; | |
494 z4 += z5; | |
495 | |
496 tmp0 += z3; | |
497 tmp1 += z4; | |
498 tmp2 = z2 + z3; | |
499 tmp3 = z1 + z4; | |
500 } | |
501 } | |
502 } else { | |
503 if (d3) { | |
504 if (d1) { | |
505 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
506 z1 = d7 + d1; | |
507 z3 = d7 + d3; | |
508 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
509 | |
510 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
511 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
512 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
513 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
514 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
515 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
516 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
517 | |
518 z3 += z5; | |
519 z4 += z5; | |
520 | |
521 tmp0 += z1 + z3; | |
522 tmp1 = z2 + z4; | |
523 tmp2 += z2 + z3; | |
524 tmp3 += z1 + z4; | |
525 } else { | |
526 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
527 z3 = d7 + d3; | |
528 | |
529 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
530 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
531 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
532 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
533 z5 = MULTIPLY(z3, FIX_1_175875602); | |
534 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
535 | |
536 tmp0 += z3; | |
537 tmp1 = z2 + z5; | |
538 tmp2 += z3; | |
539 tmp3 = z1 + z5; | |
540 } | |
541 } else { | |
542 if (d1) { | |
543 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
544 z1 = d7 + d1; | |
545 z5 = MULTIPLY(z1, FIX_1_175875602); | |
546 | |
547 z1 = MULTIPLY(z1, FIX_0_275899380); | |
548 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
549 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
550 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
551 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
552 | |
553 tmp0 += z1; | |
554 tmp1 = z4 + z5; | |
555 tmp2 = z3 + z5; | |
556 tmp3 += z1; | |
557 } else { | |
558 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
559 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
560 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
561 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
562 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
563 } | |
564 } | |
565 } | |
566 } else { | |
567 if (d5) { | |
568 if (d3) { | |
569 if (d1) { | |
570 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
571 z2 = d5 + d3; | |
572 z4 = d5 + d1; | |
573 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
574 | |
575 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
576 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
577 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
578 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
579 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
580 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
581 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
582 | |
583 z3 += z5; | |
584 z4 += z5; | |
585 | |
586 tmp0 = z1 + z3; | |
587 tmp1 += z2 + z4; | |
588 tmp2 += z2 + z3; | |
589 tmp3 += z1 + z4; | |
590 } else { | |
591 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
592 z2 = d5 + d3; | |
593 | |
594 z5 = MULTIPLY(z2, FIX_1_175875602); | |
595 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
596 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
597 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
598 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
599 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
600 | |
601 tmp0 = z3 + z5; | |
602 tmp1 += z2; | |
603 tmp2 += z2; | |
604 tmp3 = z4 + z5; | |
605 } | |
606 } else { | |
607 if (d1) { | |
608 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
609 z4 = d5 + d1; | |
610 | |
611 z5 = MULTIPLY(z4, FIX_1_175875602); | |
612 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
613 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
614 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
615 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
616 z4 = MULTIPLY(z4, FIX_0_785694958); | |
617 | |
618 tmp0 = z1 + z5; | |
619 tmp1 += z4; | |
620 tmp2 = z2 + z5; | |
621 tmp3 += z4; | |
622 } else { | |
623 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
624 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
625 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
626 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
627 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
628 } | |
629 } | |
630 } else { | |
631 if (d3) { | |
632 if (d1) { | |
633 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
634 z5 = d1 + d3; | |
635 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
636 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
637 z1 = MULTIPLY(d1, FIX_1_061594337); | |
638 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
639 z4 = MULTIPLY(z5, FIX_0_785694958); | |
640 z5 = MULTIPLY(z5, FIX_1_175875602); | |
641 | |
642 tmp0 = z1 - z4; | |
643 tmp1 = z2 + z4; | |
644 tmp2 += z5; | |
645 tmp3 += z5; | |
646 } else { | |
647 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
648 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
649 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
650 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
651 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
652 } | |
653 } else { | |
654 if (d1) { | |
655 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
656 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
657 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
658 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
659 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
660 } else { | |
661 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
662 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
663 } | |
664 } | |
665 } | |
666 } | |
667 } | |
668 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
669 | |
670 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
671 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
672 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
673 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
674 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
675 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
676 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
677 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
678 | |
679 dataptr += DCTSIZE; /* advance pointer to next row */ | |
680 } | |
681 | |
682 /* Pass 2: process columns. */ | |
683 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
684 /* and also undo the PASS1_BITS scaling. */ | |
685 | |
686 dataptr = data; | |
687 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
688 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
689 * However, the row calculation has created many nonzero AC terms, so the | |
690 * simplification applies less often (typically 5% to 10% of the time). | |
691 * On machines with very fast multiplication, it's possible that the | |
692 * test takes more time than it's worth. In that case this section | |
693 * may be commented out. | |
694 */ | |
695 | |
696 d0 = dataptr[DCTSIZE*0]; | |
697 d1 = dataptr[DCTSIZE*1]; | |
698 d2 = dataptr[DCTSIZE*2]; | |
699 d3 = dataptr[DCTSIZE*3]; | |
700 d4 = dataptr[DCTSIZE*4]; | |
701 d5 = dataptr[DCTSIZE*5]; | |
702 d6 = dataptr[DCTSIZE*6]; | |
703 d7 = dataptr[DCTSIZE*7]; | |
704 | |
705 /* Even part: reverse the even part of the forward DCT. */ | |
706 /* The rotator is sqrt(2)*c(-6). */ | |
707 if (d6) { | |
708 if (d4) { | |
709 if (d2) { | |
710 if (d0) { | |
711 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
712 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
713 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
714 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
715 | |
716 tmp0 = (d0 + d4) << CONST_BITS; | |
717 tmp1 = (d0 - d4) << CONST_BITS; | |
718 | |
719 tmp10 = tmp0 + tmp3; | |
720 tmp13 = tmp0 - tmp3; | |
721 tmp11 = tmp1 + tmp2; | |
722 tmp12 = tmp1 - tmp2; | |
723 } else { | |
724 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ | |
725 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
726 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
727 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
728 | |
729 tmp0 = d4 << CONST_BITS; | |
730 | |
731 tmp10 = tmp0 + tmp3; | |
732 tmp13 = tmp0 - tmp3; | |
733 tmp11 = tmp2 - tmp0; | |
734 tmp12 = -(tmp0 + tmp2); | |
735 } | |
736 } else { | |
737 if (d0) { | |
738 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
739 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
740 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
741 | |
742 tmp0 = (d0 + d4) << CONST_BITS; | |
743 tmp1 = (d0 - d4) << CONST_BITS; | |
744 | |
745 tmp10 = tmp0 + tmp3; | |
746 tmp13 = tmp0 - tmp3; | |
747 tmp11 = tmp1 + tmp2; | |
748 tmp12 = tmp1 - tmp2; | |
749 } else { | |
750 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ | |
751 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
752 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
753 | |
754 tmp0 = d4 << CONST_BITS; | |
755 | |
756 tmp10 = tmp0 + tmp3; | |
757 tmp13 = tmp0 - tmp3; | |
758 tmp11 = tmp2 - tmp0; | |
759 tmp12 = -(tmp0 + tmp2); | |
760 } | |
761 } | |
762 } else { | |
763 if (d2) { | |
764 if (d0) { | |
765 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ | |
766 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
767 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
768 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
769 | |
770 tmp0 = d0 << CONST_BITS; | |
771 | |
772 tmp10 = tmp0 + tmp3; | |
773 tmp13 = tmp0 - tmp3; | |
774 tmp11 = tmp0 + tmp2; | |
775 tmp12 = tmp0 - tmp2; | |
776 } else { | |
777 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ | |
778 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
779 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
780 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
781 | |
782 tmp10 = tmp3; | |
783 tmp13 = -tmp3; | |
784 tmp11 = tmp2; | |
785 tmp12 = -tmp2; | |
786 } | |
787 } else { | |
788 if (d0) { | |
789 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ | |
790 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
791 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
792 | |
793 tmp0 = d0 << CONST_BITS; | |
794 | |
795 tmp10 = tmp0 + tmp3; | |
796 tmp13 = tmp0 - tmp3; | |
797 tmp11 = tmp0 + tmp2; | |
798 tmp12 = tmp0 - tmp2; | |
799 } else { | |
800 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ | |
801 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
802 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
803 | |
804 tmp10 = tmp3; | |
805 tmp13 = -tmp3; | |
806 tmp11 = tmp2; | |
807 tmp12 = -tmp2; | |
808 } | |
809 } | |
810 } | |
811 } else { | |
812 if (d4) { | |
813 if (d2) { | |
814 if (d0) { | |
815 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
816 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
817 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
818 | |
819 tmp0 = (d0 + d4) << CONST_BITS; | |
820 tmp1 = (d0 - d4) << CONST_BITS; | |
821 | |
822 tmp10 = tmp0 + tmp3; | |
823 tmp13 = tmp0 - tmp3; | |
824 tmp11 = tmp1 + tmp2; | |
825 tmp12 = tmp1 - tmp2; | |
826 } else { | |
827 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ | |
828 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
829 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
830 | |
831 tmp0 = d4 << CONST_BITS; | |
832 | |
833 tmp10 = tmp0 + tmp3; | |
834 tmp13 = tmp0 - tmp3; | |
835 tmp11 = tmp2 - tmp0; | |
836 tmp12 = -(tmp0 + tmp2); | |
837 } | |
838 } else { | |
839 if (d0) { | |
840 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
841 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
842 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
843 } else { | |
844 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ | |
845 tmp10 = tmp13 = d4 << CONST_BITS; | |
846 tmp11 = tmp12 = -tmp10; | |
847 } | |
848 } | |
849 } else { | |
850 if (d2) { | |
851 if (d0) { | |
852 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ | |
853 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
854 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
855 | |
856 tmp0 = d0 << CONST_BITS; | |
857 | |
858 tmp10 = tmp0 + tmp3; | |
859 tmp13 = tmp0 - tmp3; | |
860 tmp11 = tmp0 + tmp2; | |
861 tmp12 = tmp0 - tmp2; | |
862 } else { | |
863 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ | |
864 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
865 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
866 | |
867 tmp10 = tmp3; | |
868 tmp13 = -tmp3; | |
869 tmp11 = tmp2; | |
870 tmp12 = -tmp2; | |
871 } | |
872 } else { | |
873 if (d0) { | |
874 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ | |
875 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; | |
876 } else { | |
877 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ | |
878 tmp10 = tmp13 = tmp11 = tmp12 = 0; | |
879 } | |
880 } | |
881 } | |
882 } | |
883 | |
884 /* Odd part per figure 8; the matrix is unitary and hence its | |
885 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
886 */ | |
887 if (d7) { | |
888 if (d5) { | |
889 if (d3) { | |
890 if (d1) { | |
891 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
892 z1 = d7 + d1; | |
893 z2 = d5 + d3; | |
894 z3 = d7 + d3; | |
895 z4 = d5 + d1; | |
896 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
897 | |
898 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
899 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
900 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
901 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
902 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
903 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
904 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
905 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
906 | |
907 z3 += z5; | |
908 z4 += z5; | |
909 | |
910 tmp0 += z1 + z3; | |
911 tmp1 += z2 + z4; | |
912 tmp2 += z2 + z3; | |
913 tmp3 += z1 + z4; | |
914 } else { | |
915 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
916 z1 = d7; | |
917 z2 = d5 + d3; | |
918 z3 = d7 + d3; | |
919 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
920 | |
921 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
922 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
923 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
924 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
925 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
926 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
927 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
928 | |
929 z3 += z5; | |
930 z4 += z5; | |
931 | |
932 tmp0 += z1 + z3; | |
933 tmp1 += z2 + z4; | |
934 tmp2 += z2 + z3; | |
935 tmp3 = z1 + z4; | |
936 } | |
937 } else { | |
938 if (d1) { | |
939 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
940 z1 = d7 + d1; | |
941 z2 = d5; | |
942 z3 = d7; | |
943 z4 = d5 + d1; | |
944 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
945 | |
946 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
947 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
948 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
949 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
950 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
951 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
952 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
953 | |
954 z3 += z5; | |
955 z4 += z5; | |
956 | |
957 tmp0 += z1 + z3; | |
958 tmp1 += z2 + z4; | |
959 tmp2 = z2 + z3; | |
960 tmp3 += z1 + z4; | |
961 } else { | |
962 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
963 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
964 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
965 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
966 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
967 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
968 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
969 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
970 | |
971 z3 += z5; | |
972 z4 += z5; | |
973 | |
974 tmp0 += z3; | |
975 tmp1 += z4; | |
976 tmp2 = z2 + z3; | |
977 tmp3 = z1 + z4; | |
978 } | |
979 } | |
980 } else { | |
981 if (d3) { | |
982 if (d1) { | |
983 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
984 z1 = d7 + d1; | |
985 z3 = d7 + d3; | |
986 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
987 | |
988 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
989 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
990 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
991 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
992 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
993 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
994 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
995 | |
996 z3 += z5; | |
997 z4 += z5; | |
998 | |
999 tmp0 += z1 + z3; | |
1000 tmp1 = z2 + z4; | |
1001 tmp2 += z2 + z3; | |
1002 tmp3 += z1 + z4; | |
1003 } else { | |
1004 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
1005 z3 = d7 + d3; | |
1006 | |
1007 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
1008 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
1009 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
1010 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
1011 z5 = MULTIPLY(z3, FIX_1_175875602); | |
1012 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
1013 | |
1014 tmp0 += z3; | |
1015 tmp1 = z2 + z5; | |
1016 tmp2 += z3; | |
1017 tmp3 = z1 + z5; | |
1018 } | |
1019 } else { | |
1020 if (d1) { | |
1021 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
1022 z1 = d7 + d1; | |
1023 z5 = MULTIPLY(z1, FIX_1_175875602); | |
1024 | |
1025 z1 = MULTIPLY(z1, FIX_0_275899380); | |
1026 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
1027 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
1028 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
1029 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
1030 | |
1031 tmp0 += z1; | |
1032 tmp1 = z4 + z5; | |
1033 tmp2 = z3 + z5; | |
1034 tmp3 += z1; | |
1035 } else { | |
1036 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
1037 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
1038 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
1039 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
1040 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
1041 } | |
1042 } | |
1043 } | |
1044 } else { | |
1045 if (d5) { | |
1046 if (d3) { | |
1047 if (d1) { | |
1048 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
1049 z2 = d5 + d3; | |
1050 z4 = d5 + d1; | |
1051 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
1052 | |
1053 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
1054 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
1055 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
1056 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
1057 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
1058 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
1059 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
1060 | |
1061 z3 += z5; | |
1062 z4 += z5; | |
1063 | |
1064 tmp0 = z1 + z3; | |
1065 tmp1 += z2 + z4; | |
1066 tmp2 += z2 + z3; | |
1067 tmp3 += z1 + z4; | |
1068 } else { | |
1069 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
1070 z2 = d5 + d3; | |
1071 | |
1072 z5 = MULTIPLY(z2, FIX_1_175875602); | |
1073 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
1074 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
1075 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
1076 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
1077 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
1078 | |
1079 tmp0 = z3 + z5; | |
1080 tmp1 += z2; | |
1081 tmp2 += z2; | |
1082 tmp3 = z4 + z5; | |
1083 } | |
1084 } else { | |
1085 if (d1) { | |
1086 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
1087 z4 = d5 + d1; | |
1088 | |
1089 z5 = MULTIPLY(z4, FIX_1_175875602); | |
1090 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
1091 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
1092 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
1093 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
1094 z4 = MULTIPLY(z4, FIX_0_785694958); | |
1095 | |
1096 tmp0 = z1 + z5; | |
1097 tmp1 += z4; | |
1098 tmp2 = z2 + z5; | |
1099 tmp3 += z4; | |
1100 } else { | |
1101 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
1102 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
1103 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
1104 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
1105 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
1106 } | |
1107 } | |
1108 } else { | |
1109 if (d3) { | |
1110 if (d1) { | |
1111 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
1112 z5 = d1 + d3; | |
1113 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
1114 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
1115 z1 = MULTIPLY(d1, FIX_1_061594337); | |
1116 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
1117 z4 = MULTIPLY(z5, FIX_0_785694958); | |
1118 z5 = MULTIPLY(z5, FIX_1_175875602); | |
1119 | |
1120 tmp0 = z1 - z4; | |
1121 tmp1 = z2 + z4; | |
1122 tmp2 += z5; | |
1123 tmp3 += z5; | |
1124 } else { | |
1125 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
1126 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
1127 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
1128 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
1129 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
1130 } | |
1131 } else { | |
1132 if (d1) { | |
1133 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
1134 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
1135 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
1136 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
1137 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
1138 } else { | |
1139 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
1140 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
1141 } | |
1142 } | |
1143 } | |
1144 } | |
1145 | |
1146 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
1147 | |
1148 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, | |
1149 CONST_BITS+PASS1_BITS+3); | |
1150 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, | |
1151 CONST_BITS+PASS1_BITS+3); | |
1152 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, | |
1153 CONST_BITS+PASS1_BITS+3); | |
1154 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, | |
1155 CONST_BITS+PASS1_BITS+3); | |
1156 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, | |
1157 CONST_BITS+PASS1_BITS+3); | |
1158 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, | |
1159 CONST_BITS+PASS1_BITS+3); | |
1160 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, | |
1161 CONST_BITS+PASS1_BITS+3); | |
1162 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, | |
1163 CONST_BITS+PASS1_BITS+3); | |
1164 | |
1165 dataptr++; /* advance pointer to next column */ | |
1166 } | |
1167 } | |
1168 | |
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1169 #undef FIX |
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1170 #undef CONST_BITS |