0
|
1 /*
|
|
2 * jrevdct.c
|
|
3 *
|
|
4 * Copyright (C) 1991, 1992, Thomas G. Lane.
|
|
5 * This file is part of the Independent JPEG Group's software.
|
|
6 * For conditions of distribution and use, see the accompanying README file.
|
|
7 *
|
|
8 * This file contains the basic inverse-DCT transformation subroutine.
|
|
9 *
|
|
10 * This implementation is based on an algorithm described in
|
|
11 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
|
|
12 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
|
|
13 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
|
|
14 * The primary algorithm described there uses 11 multiplies and 29 adds.
|
|
15 * We use their alternate method with 12 multiplies and 32 adds.
|
|
16 * The advantage of this method is that no data path contains more than one
|
|
17 * multiplication; this allows a very simple and accurate implementation in
|
|
18 * scaled fixed-point arithmetic, with a minimal number of shifts.
|
|
19 *
|
|
20 * I've made lots of modifications to attempt to take advantage of the
|
|
21 * sparse nature of the DCT matrices we're getting. Although the logic
|
|
22 * is cumbersome, it's straightforward and the resulting code is much
|
|
23 * faster.
|
|
24 *
|
|
25 * A better way to do this would be to pass in the DCT block as a sparse
|
|
26 * matrix, perhaps with the difference cases encoded.
|
|
27 */
|
|
28 #include "common.h"
|
|
29 #include "dsputil.h"
|
|
30
|
|
31 #define EIGHT_BIT_SAMPLES
|
|
32
|
|
33 #define DCTSIZE 8
|
|
34 #define DCTSIZE2 64
|
|
35
|
|
36 #define GLOBAL
|
|
37
|
|
38 #define RIGHT_SHIFT(x, n) ((x) >> (n))
|
|
39
|
|
40 typedef DCTELEM DCTBLOCK[DCTSIZE2];
|
|
41
|
|
42 #define CONST_BITS 13
|
|
43
|
|
44 /*
|
|
45 * This routine is specialized to the case DCTSIZE = 8.
|
|
46 */
|
|
47
|
|
48 #if DCTSIZE != 8
|
|
49 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
|
|
50 #endif
|
|
51
|
|
52
|
|
53 /*
|
|
54 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
|
|
55 * on each column. Direct algorithms are also available, but they are
|
|
56 * much more complex and seem not to be any faster when reduced to code.
|
|
57 *
|
|
58 * The poop on this scaling stuff is as follows:
|
|
59 *
|
|
60 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
|
|
61 * larger than the true IDCT outputs. The final outputs are therefore
|
|
62 * a factor of N larger than desired; since N=8 this can be cured by
|
|
63 * a simple right shift at the end of the algorithm. The advantage of
|
|
64 * this arrangement is that we save two multiplications per 1-D IDCT,
|
|
65 * because the y0 and y4 inputs need not be divided by sqrt(N).
|
|
66 *
|
|
67 * We have to do addition and subtraction of the integer inputs, which
|
|
68 * is no problem, and multiplication by fractional constants, which is
|
|
69 * a problem to do in integer arithmetic. We multiply all the constants
|
|
70 * by CONST_SCALE and convert them to integer constants (thus retaining
|
|
71 * CONST_BITS bits of precision in the constants). After doing a
|
|
72 * multiplication we have to divide the product by CONST_SCALE, with proper
|
|
73 * rounding, to produce the correct output. This division can be done
|
|
74 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
|
|
75 * as long as possible so that partial sums can be added together with
|
|
76 * full fractional precision.
|
|
77 *
|
|
78 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
|
|
79 * they are represented to better-than-integral precision. These outputs
|
|
80 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
|
|
81 * with the recommended scaling. (To scale up 12-bit sample data further, an
|
|
82 * intermediate int32 array would be needed.)
|
|
83 *
|
|
84 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
|
|
85 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
|
|
86 * shows that the values given below are the most effective.
|
|
87 */
|
|
88
|
|
89 #ifdef EIGHT_BIT_SAMPLES
|
|
90 #define PASS1_BITS 2
|
|
91 #else
|
|
92 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
|
|
93 #endif
|
|
94
|
|
95 #define ONE ((INT32) 1)
|
|
96
|
|
97 #define CONST_SCALE (ONE << CONST_BITS)
|
|
98
|
|
99 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
|
|
100 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
|
|
101 * you will pay a significant penalty in run time. In that case, figure
|
|
102 * the correct integer constant values and insert them by hand.
|
|
103 */
|
|
104
|
|
105 /* Actually FIX is no longer used, we precomputed them all */
|
|
106 #define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5))
|
|
107
|
|
108 /* Descale and correctly round an INT32 value that's scaled by N bits.
|
|
109 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
|
|
110 * the fudge factor is correct for either sign of X.
|
|
111 */
|
|
112
|
|
113 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
|
|
114
|
|
115 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
|
|
116 * For 8-bit samples with the recommended scaling, all the variable
|
|
117 * and constant values involved are no more than 16 bits wide, so a
|
|
118 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
|
|
119 * this provides a useful speedup on many machines.
|
|
120 * There is no way to specify a 16x16->32 multiply in portable C, but
|
|
121 * some C compilers will do the right thing if you provide the correct
|
|
122 * combination of casts.
|
|
123 * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
|
|
124 */
|
|
125
|
|
126 #ifdef EIGHT_BIT_SAMPLES
|
|
127 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
|
|
128 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))
|
|
129 #endif
|
|
130 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
|
|
131 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const)))
|
|
132 #endif
|
|
133 #endif
|
|
134
|
|
135 #ifndef MULTIPLY /* default definition */
|
|
136 #define MULTIPLY(var,const) ((var) * (const))
|
|
137 #endif
|
|
138
|
|
139
|
|
140 /*
|
|
141 Unlike our decoder where we approximate the FIXes, we need to use exact
|
|
142 ones here or successive P-frames will drift too much with Reference frame coding
|
|
143 */
|
|
144 #define FIX_0_211164243 1730
|
|
145 #define FIX_0_275899380 2260
|
|
146 #define FIX_0_298631336 2446
|
|
147 #define FIX_0_390180644 3196
|
|
148 #define FIX_0_509795579 4176
|
|
149 #define FIX_0_541196100 4433
|
|
150 #define FIX_0_601344887 4926
|
|
151 #define FIX_0_765366865 6270
|
|
152 #define FIX_0_785694958 6436
|
|
153 #define FIX_0_899976223 7373
|
|
154 #define FIX_1_061594337 8697
|
|
155 #define FIX_1_111140466 9102
|
|
156 #define FIX_1_175875602 9633
|
|
157 #define FIX_1_306562965 10703
|
|
158 #define FIX_1_387039845 11363
|
|
159 #define FIX_1_451774981 11893
|
|
160 #define FIX_1_501321110 12299
|
|
161 #define FIX_1_662939225 13623
|
|
162 #define FIX_1_847759065 15137
|
|
163 #define FIX_1_961570560 16069
|
|
164 #define FIX_2_053119869 16819
|
|
165 #define FIX_2_172734803 17799
|
|
166 #define FIX_2_562915447 20995
|
|
167 #define FIX_3_072711026 25172
|
|
168
|
|
169 /*
|
|
170 * Perform the inverse DCT on one block of coefficients.
|
|
171 */
|
|
172
|
|
173 void j_rev_dct(DCTBLOCK data)
|
|
174 {
|
|
175 INT32 tmp0, tmp1, tmp2, tmp3;
|
|
176 INT32 tmp10, tmp11, tmp12, tmp13;
|
|
177 INT32 z1, z2, z3, z4, z5;
|
|
178 INT32 d0, d1, d2, d3, d4, d5, d6, d7;
|
|
179 register DCTELEM *dataptr;
|
|
180 int rowctr;
|
|
181
|
|
182 /* Pass 1: process rows. */
|
|
183 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
|
|
184 /* furthermore, we scale the results by 2**PASS1_BITS. */
|
|
185
|
|
186 dataptr = data;
|
|
187
|
|
188 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
|
|
189 /* Due to quantization, we will usually find that many of the input
|
|
190 * coefficients are zero, especially the AC terms. We can exploit this
|
|
191 * by short-circuiting the IDCT calculation for any row in which all
|
|
192 * the AC terms are zero. In that case each output is equal to the
|
|
193 * DC coefficient (with scale factor as needed).
|
|
194 * With typical images and quantization tables, half or more of the
|
|
195 * row DCT calculations can be simplified this way.
|
|
196 */
|
|
197
|
|
198 register int *idataptr = (int*)dataptr;
|
|
199
|
|
200 d0 = dataptr[0];
|
|
201 d1 = dataptr[1];
|
|
202 d2 = dataptr[2];
|
|
203 d3 = dataptr[3];
|
|
204 d4 = dataptr[4];
|
|
205 d5 = dataptr[5];
|
|
206 d6 = dataptr[6];
|
|
207 d7 = dataptr[7];
|
|
208
|
|
209 if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) {
|
|
210 /* AC terms all zero */
|
|
211 if (d0) {
|
|
212 /* Compute a 32 bit value to assign. */
|
|
213 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
|
|
214 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
|
|
215
|
|
216 idataptr[0] = v;
|
|
217 idataptr[1] = v;
|
|
218 idataptr[2] = v;
|
|
219 idataptr[3] = v;
|
|
220 }
|
|
221
|
|
222 dataptr += DCTSIZE; /* advance pointer to next row */
|
|
223 continue;
|
|
224 }
|
|
225
|
|
226 /* Even part: reverse the even part of the forward DCT. */
|
|
227 /* The rotator is sqrt(2)*c(-6). */
|
|
228 {
|
|
229 if (d6) {
|
|
230 if (d4) {
|
|
231 if (d2) {
|
|
232 if (d0) {
|
|
233 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
|
|
234 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
|
235 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
|
236 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
|
237
|
|
238 tmp0 = (d0 + d4) << CONST_BITS;
|
|
239 tmp1 = (d0 - d4) << CONST_BITS;
|
|
240
|
|
241 tmp10 = tmp0 + tmp3;
|
|
242 tmp13 = tmp0 - tmp3;
|
|
243 tmp11 = tmp1 + tmp2;
|
|
244 tmp12 = tmp1 - tmp2;
|
|
245 } else {
|
|
246 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
|
|
247 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
|
248 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
|
249 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
|
250
|
|
251 tmp0 = d4 << CONST_BITS;
|
|
252
|
|
253 tmp10 = tmp0 + tmp3;
|
|
254 tmp13 = tmp0 - tmp3;
|
|
255 tmp11 = tmp2 - tmp0;
|
|
256 tmp12 = -(tmp0 + tmp2);
|
|
257 }
|
|
258 } else {
|
|
259 if (d0) {
|
|
260 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
|
|
261 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
|
262 tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
|
263
|
|
264 tmp0 = (d0 + d4) << CONST_BITS;
|
|
265 tmp1 = (d0 - d4) << CONST_BITS;
|
|
266
|
|
267 tmp10 = tmp0 + tmp3;
|
|
268 tmp13 = tmp0 - tmp3;
|
|
269 tmp11 = tmp1 + tmp2;
|
|
270 tmp12 = tmp1 - tmp2;
|
|
271 } else {
|
|
272 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
|
|
273 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
|
274 tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
|
275
|
|
276 tmp0 = d4 << CONST_BITS;
|
|
277
|
|
278 tmp10 = tmp0 + tmp3;
|
|
279 tmp13 = tmp0 - tmp3;
|
|
280 tmp11 = tmp2 - tmp0;
|
|
281 tmp12 = -(tmp0 + tmp2);
|
|
282 }
|
|
283 }
|
|
284 } else {
|
|
285 if (d2) {
|
|
286 if (d0) {
|
|
287 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
|
|
288 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
|
289 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
|
290 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
|
291
|
|
292 tmp0 = d0 << CONST_BITS;
|
|
293
|
|
294 tmp10 = tmp0 + tmp3;
|
|
295 tmp13 = tmp0 - tmp3;
|
|
296 tmp11 = tmp0 + tmp2;
|
|
297 tmp12 = tmp0 - tmp2;
|
|
298 } else {
|
|
299 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
|
|
300 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
|
301 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
|
302 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
|
303
|
|
304 tmp10 = tmp3;
|
|
305 tmp13 = -tmp3;
|
|
306 tmp11 = tmp2;
|
|
307 tmp12 = -tmp2;
|
|
308 }
|
|
309 } else {
|
|
310 if (d0) {
|
|
311 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
|
|
312 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
|
313 tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
|
314
|
|
315 tmp0 = d0 << CONST_BITS;
|
|
316
|
|
317 tmp10 = tmp0 + tmp3;
|
|
318 tmp13 = tmp0 - tmp3;
|
|
319 tmp11 = tmp0 + tmp2;
|
|
320 tmp12 = tmp0 - tmp2;
|
|
321 } else {
|
|
322 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
|
|
323 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
|
324 tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
|
325
|
|
326 tmp10 = tmp3;
|
|
327 tmp13 = -tmp3;
|
|
328 tmp11 = tmp2;
|
|
329 tmp12 = -tmp2;
|
|
330 }
|
|
331 }
|
|
332 }
|
|
333 } else {
|
|
334 if (d4) {
|
|
335 if (d2) {
|
|
336 if (d0) {
|
|
337 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
|
|
338 tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
|
339 tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
|
340
|
|
341 tmp0 = (d0 + d4) << CONST_BITS;
|
|
342 tmp1 = (d0 - d4) << CONST_BITS;
|
|
343
|
|
344 tmp10 = tmp0 + tmp3;
|
|
345 tmp13 = tmp0 - tmp3;
|
|
346 tmp11 = tmp1 + tmp2;
|
|
347 tmp12 = tmp1 - tmp2;
|
|
348 } else {
|
|
349 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
|
|
350 tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
|
351 tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
|
352
|
|
353 tmp0 = d4 << CONST_BITS;
|
|
354
|
|
355 tmp10 = tmp0 + tmp3;
|
|
356 tmp13 = tmp0 - tmp3;
|
|
357 tmp11 = tmp2 - tmp0;
|
|
358 tmp12 = -(tmp0 + tmp2);
|
|
359 }
|
|
360 } else {
|
|
361 if (d0) {
|
|
362 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
|
|
363 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
|
|
364 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
|
|
365 } else {
|
|
366 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
|
|
367 tmp10 = tmp13 = d4 << CONST_BITS;
|
|
368 tmp11 = tmp12 = -tmp10;
|
|
369 }
|
|
370 }
|
|
371 } else {
|
|
372 if (d2) {
|
|
373 if (d0) {
|
|
374 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
|
|
375 tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
|
376 tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
|
377
|
|
378 tmp0 = d0 << CONST_BITS;
|
|
379
|
|
380 tmp10 = tmp0 + tmp3;
|
|
381 tmp13 = tmp0 - tmp3;
|
|
382 tmp11 = tmp0 + tmp2;
|
|
383 tmp12 = tmp0 - tmp2;
|
|
384 } else {
|
|
385 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
|
|
386 tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
|
387 tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
|
388
|
|
389 tmp10 = tmp3;
|
|
390 tmp13 = -tmp3;
|
|
391 tmp11 = tmp2;
|
|
392 tmp12 = -tmp2;
|
|
393 }
|
|
394 } else {
|
|
395 if (d0) {
|
|
396 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
|
|
397 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
|
|
398 } else {
|
|
399 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
|
|
400 tmp10 = tmp13 = tmp11 = tmp12 = 0;
|
|
401 }
|
|
402 }
|
|
403 }
|
|
404 }
|
|
405
|
|
406 /* Odd part per figure 8; the matrix is unitary and hence its
|
|
407 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
|
|
408 */
|
|
409
|
|
410 if (d7) {
|
|
411 if (d5) {
|
|
412 if (d3) {
|
|
413 if (d1) {
|
|
414 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
|
|
415 z1 = d7 + d1;
|
|
416 z2 = d5 + d3;
|
|
417 z3 = d7 + d3;
|
|
418 z4 = d5 + d1;
|
|
419 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
|
|
420
|
|
421 tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
|
422 tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
|
423 tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
|
424 tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
|
425 z1 = MULTIPLY(-z1, FIX_0_899976223);
|
|
426 z2 = MULTIPLY(-z2, FIX_2_562915447);
|
|
427 z3 = MULTIPLY(-z3, FIX_1_961570560);
|
|
428 z4 = MULTIPLY(-z4, FIX_0_390180644);
|
|
429
|
|
430 z3 += z5;
|
|
431 z4 += z5;
|
|
432
|
|
433 tmp0 += z1 + z3;
|
|
434 tmp1 += z2 + z4;
|
|
435 tmp2 += z2 + z3;
|
|
436 tmp3 += z1 + z4;
|
|
437 } else {
|
|
438 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
|
|
439 z2 = d5 + d3;
|
|
440 z3 = d7 + d3;
|
|
441 z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
|
|
442
|
|
443 tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
|
444 tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
|
445 tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
|
446 z1 = MULTIPLY(-d7, FIX_0_899976223);
|
|
447 z2 = MULTIPLY(-z2, FIX_2_562915447);
|
|
448 z3 = MULTIPLY(-z3, FIX_1_961570560);
|
|
449 z4 = MULTIPLY(-d5, FIX_0_390180644);
|
|
450
|
|
451 z3 += z5;
|
|
452 z4 += z5;
|
|
453
|
|
454 tmp0 += z1 + z3;
|
|
455 tmp1 += z2 + z4;
|
|
456 tmp2 += z2 + z3;
|
|
457 tmp3 = z1 + z4;
|
|
458 }
|
|
459 } else {
|
|
460 if (d1) {
|
|
461 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
|
|
462 z1 = d7 + d1;
|
|
463 z4 = d5 + d1;
|
|
464 z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
|
|
465
|
|
466 tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
|
467 tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
|
468 tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
|
469 z1 = MULTIPLY(-z1, FIX_0_899976223);
|
|
470 z2 = MULTIPLY(-d5, FIX_2_562915447);
|
|
471 z3 = MULTIPLY(-d7, FIX_1_961570560);
|
|
472 z4 = MULTIPLY(-z4, FIX_0_390180644);
|
|
473
|
|
474 z3 += z5;
|
|
475 z4 += z5;
|
|
476
|
|
477 tmp0 += z1 + z3;
|
|
478 tmp1 += z2 + z4;
|
|
479 tmp2 = z2 + z3;
|
|
480 tmp3 += z1 + z4;
|
|
481 } else {
|
|
482 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
|
|
483 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
|
|
484 z1 = MULTIPLY(-d7, FIX_0_899976223);
|
|
485 z3 = MULTIPLY(-d7, FIX_1_961570560);
|
|
486 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
|
|
487 z2 = MULTIPLY(-d5, FIX_2_562915447);
|
|
488 z4 = MULTIPLY(-d5, FIX_0_390180644);
|
|
489 z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
|
|
490
|
|
491 z3 += z5;
|
|
492 z4 += z5;
|
|
493
|
|
494 tmp0 += z3;
|
|
495 tmp1 += z4;
|
|
496 tmp2 = z2 + z3;
|
|
497 tmp3 = z1 + z4;
|
|
498 }
|
|
499 }
|
|
500 } else {
|
|
501 if (d3) {
|
|
502 if (d1) {
|
|
503 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
|
|
504 z1 = d7 + d1;
|
|
505 z3 = d7 + d3;
|
|
506 z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
|
|
507
|
|
508 tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
|
509 tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
|
510 tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
|
511 z1 = MULTIPLY(-z1, FIX_0_899976223);
|
|
512 z2 = MULTIPLY(-d3, FIX_2_562915447);
|
|
513 z3 = MULTIPLY(-z3, FIX_1_961570560);
|
|
514 z4 = MULTIPLY(-d1, FIX_0_390180644);
|
|
515
|
|
516 z3 += z5;
|
|
517 z4 += z5;
|
|
518
|
|
519 tmp0 += z1 + z3;
|
|
520 tmp1 = z2 + z4;
|
|
521 tmp2 += z2 + z3;
|
|
522 tmp3 += z1 + z4;
|
|
523 } else {
|
|
524 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
|
|
525 z3 = d7 + d3;
|
|
526
|
|
527 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
|
|
528 z1 = MULTIPLY(-d7, FIX_0_899976223);
|
|
529 tmp2 = MULTIPLY(d3, FIX_0_509795579);
|
|
530 z2 = MULTIPLY(-d3, FIX_2_562915447);
|
|
531 z5 = MULTIPLY(z3, FIX_1_175875602);
|
|
532 z3 = MULTIPLY(-z3, FIX_0_785694958);
|
|
533
|
|
534 tmp0 += z3;
|
|
535 tmp1 = z2 + z5;
|
|
536 tmp2 += z3;
|
|
537 tmp3 = z1 + z5;
|
|
538 }
|
|
539 } else {
|
|
540 if (d1) {
|
|
541 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
|
|
542 z1 = d7 + d1;
|
|
543 z5 = MULTIPLY(z1, FIX_1_175875602);
|
|
544
|
|
545 z1 = MULTIPLY(z1, FIX_0_275899380);
|
|
546 z3 = MULTIPLY(-d7, FIX_1_961570560);
|
|
547 tmp0 = MULTIPLY(-d7, FIX_1_662939225);
|
|
548 z4 = MULTIPLY(-d1, FIX_0_390180644);
|
|
549 tmp3 = MULTIPLY(d1, FIX_1_111140466);
|
|
550
|
|
551 tmp0 += z1;
|
|
552 tmp1 = z4 + z5;
|
|
553 tmp2 = z3 + z5;
|
|
554 tmp3 += z1;
|
|
555 } else {
|
|
556 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
|
|
557 tmp0 = MULTIPLY(-d7, FIX_1_387039845);
|
|
558 tmp1 = MULTIPLY(d7, FIX_1_175875602);
|
|
559 tmp2 = MULTIPLY(-d7, FIX_0_785694958);
|
|
560 tmp3 = MULTIPLY(d7, FIX_0_275899380);
|
|
561 }
|
|
562 }
|
|
563 }
|
|
564 } else {
|
|
565 if (d5) {
|
|
566 if (d3) {
|
|
567 if (d1) {
|
|
568 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
|
|
569 z2 = d5 + d3;
|
|
570 z4 = d5 + d1;
|
|
571 z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
|
|
572
|
|
573 tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
|
574 tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
|
575 tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
|
576 z1 = MULTIPLY(-d1, FIX_0_899976223);
|
|
577 z2 = MULTIPLY(-z2, FIX_2_562915447);
|
|
578 z3 = MULTIPLY(-d3, FIX_1_961570560);
|
|
579 z4 = MULTIPLY(-z4, FIX_0_390180644);
|
|
580
|
|
581 z3 += z5;
|
|
582 z4 += z5;
|
|
583
|
|
584 tmp0 = z1 + z3;
|
|
585 tmp1 += z2 + z4;
|
|
586 tmp2 += z2 + z3;
|
|
587 tmp3 += z1 + z4;
|
|
588 } else {
|
|
589 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
|
|
590 z2 = d5 + d3;
|
|
591
|
|
592 z5 = MULTIPLY(z2, FIX_1_175875602);
|
|
593 tmp1 = MULTIPLY(d5, FIX_1_662939225);
|
|
594 z4 = MULTIPLY(-d5, FIX_0_390180644);
|
|
595 z2 = MULTIPLY(-z2, FIX_1_387039845);
|
|
596 tmp2 = MULTIPLY(d3, FIX_1_111140466);
|
|
597 z3 = MULTIPLY(-d3, FIX_1_961570560);
|
|
598
|
|
599 tmp0 = z3 + z5;
|
|
600 tmp1 += z2;
|
|
601 tmp2 += z2;
|
|
602 tmp3 = z4 + z5;
|
|
603 }
|
|
604 } else {
|
|
605 if (d1) {
|
|
606 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
|
|
607 z4 = d5 + d1;
|
|
608
|
|
609 z5 = MULTIPLY(z4, FIX_1_175875602);
|
|
610 z1 = MULTIPLY(-d1, FIX_0_899976223);
|
|
611 tmp3 = MULTIPLY(d1, FIX_0_601344887);
|
|
612 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
|
|
613 z2 = MULTIPLY(-d5, FIX_2_562915447);
|
|
614 z4 = MULTIPLY(z4, FIX_0_785694958);
|
|
615
|
|
616 tmp0 = z1 + z5;
|
|
617 tmp1 += z4;
|
|
618 tmp2 = z2 + z5;
|
|
619 tmp3 += z4;
|
|
620 } else {
|
|
621 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
|
|
622 tmp0 = MULTIPLY(d5, FIX_1_175875602);
|
|
623 tmp1 = MULTIPLY(d5, FIX_0_275899380);
|
|
624 tmp2 = MULTIPLY(-d5, FIX_1_387039845);
|
|
625 tmp3 = MULTIPLY(d5, FIX_0_785694958);
|
|
626 }
|
|
627 }
|
|
628 } else {
|
|
629 if (d3) {
|
|
630 if (d1) {
|
|
631 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
|
|
632 z5 = d1 + d3;
|
|
633 tmp3 = MULTIPLY(d1, FIX_0_211164243);
|
|
634 tmp2 = MULTIPLY(-d3, FIX_1_451774981);
|
|
635 z1 = MULTIPLY(d1, FIX_1_061594337);
|
|
636 z2 = MULTIPLY(-d3, FIX_2_172734803);
|
|
637 z4 = MULTIPLY(z5, FIX_0_785694958);
|
|
638 z5 = MULTIPLY(z5, FIX_1_175875602);
|
|
639
|
|
640 tmp0 = z1 - z4;
|
|
641 tmp1 = z2 + z4;
|
|
642 tmp2 += z5;
|
|
643 tmp3 += z5;
|
|
644 } else {
|
|
645 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
|
|
646 tmp0 = MULTIPLY(-d3, FIX_0_785694958);
|
|
647 tmp1 = MULTIPLY(-d3, FIX_1_387039845);
|
|
648 tmp2 = MULTIPLY(-d3, FIX_0_275899380);
|
|
649 tmp3 = MULTIPLY(d3, FIX_1_175875602);
|
|
650 }
|
|
651 } else {
|
|
652 if (d1) {
|
|
653 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
|
|
654 tmp0 = MULTIPLY(d1, FIX_0_275899380);
|
|
655 tmp1 = MULTIPLY(d1, FIX_0_785694958);
|
|
656 tmp2 = MULTIPLY(d1, FIX_1_175875602);
|
|
657 tmp3 = MULTIPLY(d1, FIX_1_387039845);
|
|
658 } else {
|
|
659 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
|
|
660 tmp0 = tmp1 = tmp2 = tmp3 = 0;
|
|
661 }
|
|
662 }
|
|
663 }
|
|
664 }
|
|
665 }
|
|
666 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
|
|
667
|
|
668 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
|
|
669 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
|
|
670 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
|
|
671 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
|
|
672 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
|
|
673 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
|
|
674 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
|
|
675 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
|
|
676
|
|
677 dataptr += DCTSIZE; /* advance pointer to next row */
|
|
678 }
|
|
679
|
|
680 /* Pass 2: process columns. */
|
|
681 /* Note that we must descale the results by a factor of 8 == 2**3, */
|
|
682 /* and also undo the PASS1_BITS scaling. */
|
|
683
|
|
684 dataptr = data;
|
|
685 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
|
|
686 /* Columns of zeroes can be exploited in the same way as we did with rows.
|
|
687 * However, the row calculation has created many nonzero AC terms, so the
|
|
688 * simplification applies less often (typically 5% to 10% of the time).
|
|
689 * On machines with very fast multiplication, it's possible that the
|
|
690 * test takes more time than it's worth. In that case this section
|
|
691 * may be commented out.
|
|
692 */
|
|
693
|
|
694 d0 = dataptr[DCTSIZE*0];
|
|
695 d1 = dataptr[DCTSIZE*1];
|
|
696 d2 = dataptr[DCTSIZE*2];
|
|
697 d3 = dataptr[DCTSIZE*3];
|
|
698 d4 = dataptr[DCTSIZE*4];
|
|
699 d5 = dataptr[DCTSIZE*5];
|
|
700 d6 = dataptr[DCTSIZE*6];
|
|
701 d7 = dataptr[DCTSIZE*7];
|
|
702
|
|
703 /* Even part: reverse the even part of the forward DCT. */
|
|
704 /* The rotator is sqrt(2)*c(-6). */
|
|
705 if (d6) {
|
|
706 if (d4) {
|
|
707 if (d2) {
|
|
708 if (d0) {
|
|
709 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
|
|
710 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
|
711 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
|
712 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
|
713
|
|
714 tmp0 = (d0 + d4) << CONST_BITS;
|
|
715 tmp1 = (d0 - d4) << CONST_BITS;
|
|
716
|
|
717 tmp10 = tmp0 + tmp3;
|
|
718 tmp13 = tmp0 - tmp3;
|
|
719 tmp11 = tmp1 + tmp2;
|
|
720 tmp12 = tmp1 - tmp2;
|
|
721 } else {
|
|
722 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
|
|
723 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
|
724 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
|
725 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
|
726
|
|
727 tmp0 = d4 << CONST_BITS;
|
|
728
|
|
729 tmp10 = tmp0 + tmp3;
|
|
730 tmp13 = tmp0 - tmp3;
|
|
731 tmp11 = tmp2 - tmp0;
|
|
732 tmp12 = -(tmp0 + tmp2);
|
|
733 }
|
|
734 } else {
|
|
735 if (d0) {
|
|
736 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
|
|
737 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
|
738 tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
|
739
|
|
740 tmp0 = (d0 + d4) << CONST_BITS;
|
|
741 tmp1 = (d0 - d4) << CONST_BITS;
|
|
742
|
|
743 tmp10 = tmp0 + tmp3;
|
|
744 tmp13 = tmp0 - tmp3;
|
|
745 tmp11 = tmp1 + tmp2;
|
|
746 tmp12 = tmp1 - tmp2;
|
|
747 } else {
|
|
748 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
|
|
749 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
|
750 tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
|
751
|
|
752 tmp0 = d4 << CONST_BITS;
|
|
753
|
|
754 tmp10 = tmp0 + tmp3;
|
|
755 tmp13 = tmp0 - tmp3;
|
|
756 tmp11 = tmp2 - tmp0;
|
|
757 tmp12 = -(tmp0 + tmp2);
|
|
758 }
|
|
759 }
|
|
760 } else {
|
|
761 if (d2) {
|
|
762 if (d0) {
|
|
763 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
|
|
764 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
|
765 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
|
766 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
|
767
|
|
768 tmp0 = d0 << CONST_BITS;
|
|
769
|
|
770 tmp10 = tmp0 + tmp3;
|
|
771 tmp13 = tmp0 - tmp3;
|
|
772 tmp11 = tmp0 + tmp2;
|
|
773 tmp12 = tmp0 - tmp2;
|
|
774 } else {
|
|
775 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
|
|
776 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
|
|
777 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
|
|
778 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
|
|
779
|
|
780 tmp10 = tmp3;
|
|
781 tmp13 = -tmp3;
|
|
782 tmp11 = tmp2;
|
|
783 tmp12 = -tmp2;
|
|
784 }
|
|
785 } else {
|
|
786 if (d0) {
|
|
787 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
|
|
788 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
|
789 tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
|
790
|
|
791 tmp0 = d0 << CONST_BITS;
|
|
792
|
|
793 tmp10 = tmp0 + tmp3;
|
|
794 tmp13 = tmp0 - tmp3;
|
|
795 tmp11 = tmp0 + tmp2;
|
|
796 tmp12 = tmp0 - tmp2;
|
|
797 } else {
|
|
798 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
|
|
799 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
|
|
800 tmp3 = MULTIPLY(d6, FIX_0_541196100);
|
|
801
|
|
802 tmp10 = tmp3;
|
|
803 tmp13 = -tmp3;
|
|
804 tmp11 = tmp2;
|
|
805 tmp12 = -tmp2;
|
|
806 }
|
|
807 }
|
|
808 }
|
|
809 } else {
|
|
810 if (d4) {
|
|
811 if (d2) {
|
|
812 if (d0) {
|
|
813 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
|
|
814 tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
|
815 tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
|
816
|
|
817 tmp0 = (d0 + d4) << CONST_BITS;
|
|
818 tmp1 = (d0 - d4) << CONST_BITS;
|
|
819
|
|
820 tmp10 = tmp0 + tmp3;
|
|
821 tmp13 = tmp0 - tmp3;
|
|
822 tmp11 = tmp1 + tmp2;
|
|
823 tmp12 = tmp1 - tmp2;
|
|
824 } else {
|
|
825 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
|
|
826 tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
|
827 tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
|
828
|
|
829 tmp0 = d4 << CONST_BITS;
|
|
830
|
|
831 tmp10 = tmp0 + tmp3;
|
|
832 tmp13 = tmp0 - tmp3;
|
|
833 tmp11 = tmp2 - tmp0;
|
|
834 tmp12 = -(tmp0 + tmp2);
|
|
835 }
|
|
836 } else {
|
|
837 if (d0) {
|
|
838 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
|
|
839 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
|
|
840 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
|
|
841 } else {
|
|
842 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
|
|
843 tmp10 = tmp13 = d4 << CONST_BITS;
|
|
844 tmp11 = tmp12 = -tmp10;
|
|
845 }
|
|
846 }
|
|
847 } else {
|
|
848 if (d2) {
|
|
849 if (d0) {
|
|
850 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
|
|
851 tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
|
852 tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
|
853
|
|
854 tmp0 = d0 << CONST_BITS;
|
|
855
|
|
856 tmp10 = tmp0 + tmp3;
|
|
857 tmp13 = tmp0 - tmp3;
|
|
858 tmp11 = tmp0 + tmp2;
|
|
859 tmp12 = tmp0 - tmp2;
|
|
860 } else {
|
|
861 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
|
|
862 tmp2 = MULTIPLY(d2, FIX_0_541196100);
|
|
863 tmp3 = MULTIPLY(d2, FIX_1_306562965);
|
|
864
|
|
865 tmp10 = tmp3;
|
|
866 tmp13 = -tmp3;
|
|
867 tmp11 = tmp2;
|
|
868 tmp12 = -tmp2;
|
|
869 }
|
|
870 } else {
|
|
871 if (d0) {
|
|
872 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
|
|
873 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
|
|
874 } else {
|
|
875 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
|
|
876 tmp10 = tmp13 = tmp11 = tmp12 = 0;
|
|
877 }
|
|
878 }
|
|
879 }
|
|
880 }
|
|
881
|
|
882 /* Odd part per figure 8; the matrix is unitary and hence its
|
|
883 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
|
|
884 */
|
|
885 if (d7) {
|
|
886 if (d5) {
|
|
887 if (d3) {
|
|
888 if (d1) {
|
|
889 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
|
|
890 z1 = d7 + d1;
|
|
891 z2 = d5 + d3;
|
|
892 z3 = d7 + d3;
|
|
893 z4 = d5 + d1;
|
|
894 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
|
|
895
|
|
896 tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
|
897 tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
|
898 tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
|
899 tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
|
900 z1 = MULTIPLY(-z1, FIX_0_899976223);
|
|
901 z2 = MULTIPLY(-z2, FIX_2_562915447);
|
|
902 z3 = MULTIPLY(-z3, FIX_1_961570560);
|
|
903 z4 = MULTIPLY(-z4, FIX_0_390180644);
|
|
904
|
|
905 z3 += z5;
|
|
906 z4 += z5;
|
|
907
|
|
908 tmp0 += z1 + z3;
|
|
909 tmp1 += z2 + z4;
|
|
910 tmp2 += z2 + z3;
|
|
911 tmp3 += z1 + z4;
|
|
912 } else {
|
|
913 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
|
|
914 z1 = d7;
|
|
915 z2 = d5 + d3;
|
|
916 z3 = d7 + d3;
|
|
917 z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
|
|
918
|
|
919 tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
|
920 tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
|
921 tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
|
922 z1 = MULTIPLY(-d7, FIX_0_899976223);
|
|
923 z2 = MULTIPLY(-z2, FIX_2_562915447);
|
|
924 z3 = MULTIPLY(-z3, FIX_1_961570560);
|
|
925 z4 = MULTIPLY(-d5, FIX_0_390180644);
|
|
926
|
|
927 z3 += z5;
|
|
928 z4 += z5;
|
|
929
|
|
930 tmp0 += z1 + z3;
|
|
931 tmp1 += z2 + z4;
|
|
932 tmp2 += z2 + z3;
|
|
933 tmp3 = z1 + z4;
|
|
934 }
|
|
935 } else {
|
|
936 if (d1) {
|
|
937 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
|
|
938 z1 = d7 + d1;
|
|
939 z2 = d5;
|
|
940 z3 = d7;
|
|
941 z4 = d5 + d1;
|
|
942 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
|
|
943
|
|
944 tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
|
945 tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
|
946 tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
|
947 z1 = MULTIPLY(-z1, FIX_0_899976223);
|
|
948 z2 = MULTIPLY(-d5, FIX_2_562915447);
|
|
949 z3 = MULTIPLY(-d7, FIX_1_961570560);
|
|
950 z4 = MULTIPLY(-z4, FIX_0_390180644);
|
|
951
|
|
952 z3 += z5;
|
|
953 z4 += z5;
|
|
954
|
|
955 tmp0 += z1 + z3;
|
|
956 tmp1 += z2 + z4;
|
|
957 tmp2 = z2 + z3;
|
|
958 tmp3 += z1 + z4;
|
|
959 } else {
|
|
960 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
|
|
961 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
|
|
962 z1 = MULTIPLY(-d7, FIX_0_899976223);
|
|
963 z3 = MULTIPLY(-d7, FIX_1_961570560);
|
|
964 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
|
|
965 z2 = MULTIPLY(-d5, FIX_2_562915447);
|
|
966 z4 = MULTIPLY(-d5, FIX_0_390180644);
|
|
967 z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
|
|
968
|
|
969 z3 += z5;
|
|
970 z4 += z5;
|
|
971
|
|
972 tmp0 += z3;
|
|
973 tmp1 += z4;
|
|
974 tmp2 = z2 + z3;
|
|
975 tmp3 = z1 + z4;
|
|
976 }
|
|
977 }
|
|
978 } else {
|
|
979 if (d3) {
|
|
980 if (d1) {
|
|
981 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
|
|
982 z1 = d7 + d1;
|
|
983 z3 = d7 + d3;
|
|
984 z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
|
|
985
|
|
986 tmp0 = MULTIPLY(d7, FIX_0_298631336);
|
|
987 tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
|
988 tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
|
989 z1 = MULTIPLY(-z1, FIX_0_899976223);
|
|
990 z2 = MULTIPLY(-d3, FIX_2_562915447);
|
|
991 z3 = MULTIPLY(-z3, FIX_1_961570560);
|
|
992 z4 = MULTIPLY(-d1, FIX_0_390180644);
|
|
993
|
|
994 z3 += z5;
|
|
995 z4 += z5;
|
|
996
|
|
997 tmp0 += z1 + z3;
|
|
998 tmp1 = z2 + z4;
|
|
999 tmp2 += z2 + z3;
|
|
1000 tmp3 += z1 + z4;
|
|
1001 } else {
|
|
1002 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
|
|
1003 z3 = d7 + d3;
|
|
1004
|
|
1005 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
|
|
1006 z1 = MULTIPLY(-d7, FIX_0_899976223);
|
|
1007 tmp2 = MULTIPLY(d3, FIX_0_509795579);
|
|
1008 z2 = MULTIPLY(-d3, FIX_2_562915447);
|
|
1009 z5 = MULTIPLY(z3, FIX_1_175875602);
|
|
1010 z3 = MULTIPLY(-z3, FIX_0_785694958);
|
|
1011
|
|
1012 tmp0 += z3;
|
|
1013 tmp1 = z2 + z5;
|
|
1014 tmp2 += z3;
|
|
1015 tmp3 = z1 + z5;
|
|
1016 }
|
|
1017 } else {
|
|
1018 if (d1) {
|
|
1019 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
|
|
1020 z1 = d7 + d1;
|
|
1021 z5 = MULTIPLY(z1, FIX_1_175875602);
|
|
1022
|
|
1023 z1 = MULTIPLY(z1, FIX_0_275899380);
|
|
1024 z3 = MULTIPLY(-d7, FIX_1_961570560);
|
|
1025 tmp0 = MULTIPLY(-d7, FIX_1_662939225);
|
|
1026 z4 = MULTIPLY(-d1, FIX_0_390180644);
|
|
1027 tmp3 = MULTIPLY(d1, FIX_1_111140466);
|
|
1028
|
|
1029 tmp0 += z1;
|
|
1030 tmp1 = z4 + z5;
|
|
1031 tmp2 = z3 + z5;
|
|
1032 tmp3 += z1;
|
|
1033 } else {
|
|
1034 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
|
|
1035 tmp0 = MULTIPLY(-d7, FIX_1_387039845);
|
|
1036 tmp1 = MULTIPLY(d7, FIX_1_175875602);
|
|
1037 tmp2 = MULTIPLY(-d7, FIX_0_785694958);
|
|
1038 tmp3 = MULTIPLY(d7, FIX_0_275899380);
|
|
1039 }
|
|
1040 }
|
|
1041 }
|
|
1042 } else {
|
|
1043 if (d5) {
|
|
1044 if (d3) {
|
|
1045 if (d1) {
|
|
1046 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
|
|
1047 z2 = d5 + d3;
|
|
1048 z4 = d5 + d1;
|
|
1049 z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
|
|
1050
|
|
1051 tmp1 = MULTIPLY(d5, FIX_2_053119869);
|
|
1052 tmp2 = MULTIPLY(d3, FIX_3_072711026);
|
|
1053 tmp3 = MULTIPLY(d1, FIX_1_501321110);
|
|
1054 z1 = MULTIPLY(-d1, FIX_0_899976223);
|
|
1055 z2 = MULTIPLY(-z2, FIX_2_562915447);
|
|
1056 z3 = MULTIPLY(-d3, FIX_1_961570560);
|
|
1057 z4 = MULTIPLY(-z4, FIX_0_390180644);
|
|
1058
|
|
1059 z3 += z5;
|
|
1060 z4 += z5;
|
|
1061
|
|
1062 tmp0 = z1 + z3;
|
|
1063 tmp1 += z2 + z4;
|
|
1064 tmp2 += z2 + z3;
|
|
1065 tmp3 += z1 + z4;
|
|
1066 } else {
|
|
1067 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
|
|
1068 z2 = d5 + d3;
|
|
1069
|
|
1070 z5 = MULTIPLY(z2, FIX_1_175875602);
|
|
1071 tmp1 = MULTIPLY(d5, FIX_1_662939225);
|
|
1072 z4 = MULTIPLY(-d5, FIX_0_390180644);
|
|
1073 z2 = MULTIPLY(-z2, FIX_1_387039845);
|
|
1074 tmp2 = MULTIPLY(d3, FIX_1_111140466);
|
|
1075 z3 = MULTIPLY(-d3, FIX_1_961570560);
|
|
1076
|
|
1077 tmp0 = z3 + z5;
|
|
1078 tmp1 += z2;
|
|
1079 tmp2 += z2;
|
|
1080 tmp3 = z4 + z5;
|
|
1081 }
|
|
1082 } else {
|
|
1083 if (d1) {
|
|
1084 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
|
|
1085 z4 = d5 + d1;
|
|
1086
|
|
1087 z5 = MULTIPLY(z4, FIX_1_175875602);
|
|
1088 z1 = MULTIPLY(-d1, FIX_0_899976223);
|
|
1089 tmp3 = MULTIPLY(d1, FIX_0_601344887);
|
|
1090 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
|
|
1091 z2 = MULTIPLY(-d5, FIX_2_562915447);
|
|
1092 z4 = MULTIPLY(z4, FIX_0_785694958);
|
|
1093
|
|
1094 tmp0 = z1 + z5;
|
|
1095 tmp1 += z4;
|
|
1096 tmp2 = z2 + z5;
|
|
1097 tmp3 += z4;
|
|
1098 } else {
|
|
1099 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
|
|
1100 tmp0 = MULTIPLY(d5, FIX_1_175875602);
|
|
1101 tmp1 = MULTIPLY(d5, FIX_0_275899380);
|
|
1102 tmp2 = MULTIPLY(-d5, FIX_1_387039845);
|
|
1103 tmp3 = MULTIPLY(d5, FIX_0_785694958);
|
|
1104 }
|
|
1105 }
|
|
1106 } else {
|
|
1107 if (d3) {
|
|
1108 if (d1) {
|
|
1109 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
|
|
1110 z5 = d1 + d3;
|
|
1111 tmp3 = MULTIPLY(d1, FIX_0_211164243);
|
|
1112 tmp2 = MULTIPLY(-d3, FIX_1_451774981);
|
|
1113 z1 = MULTIPLY(d1, FIX_1_061594337);
|
|
1114 z2 = MULTIPLY(-d3, FIX_2_172734803);
|
|
1115 z4 = MULTIPLY(z5, FIX_0_785694958);
|
|
1116 z5 = MULTIPLY(z5, FIX_1_175875602);
|
|
1117
|
|
1118 tmp0 = z1 - z4;
|
|
1119 tmp1 = z2 + z4;
|
|
1120 tmp2 += z5;
|
|
1121 tmp3 += z5;
|
|
1122 } else {
|
|
1123 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
|
|
1124 tmp0 = MULTIPLY(-d3, FIX_0_785694958);
|
|
1125 tmp1 = MULTIPLY(-d3, FIX_1_387039845);
|
|
1126 tmp2 = MULTIPLY(-d3, FIX_0_275899380);
|
|
1127 tmp3 = MULTIPLY(d3, FIX_1_175875602);
|
|
1128 }
|
|
1129 } else {
|
|
1130 if (d1) {
|
|
1131 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
|
|
1132 tmp0 = MULTIPLY(d1, FIX_0_275899380);
|
|
1133 tmp1 = MULTIPLY(d1, FIX_0_785694958);
|
|
1134 tmp2 = MULTIPLY(d1, FIX_1_175875602);
|
|
1135 tmp3 = MULTIPLY(d1, FIX_1_387039845);
|
|
1136 } else {
|
|
1137 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
|
|
1138 tmp0 = tmp1 = tmp2 = tmp3 = 0;
|
|
1139 }
|
|
1140 }
|
|
1141 }
|
|
1142 }
|
|
1143
|
|
1144 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
|
|
1145
|
|
1146 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
|
|
1147 CONST_BITS+PASS1_BITS+3);
|
|
1148 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
|
|
1149 CONST_BITS+PASS1_BITS+3);
|
|
1150 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
|
|
1151 CONST_BITS+PASS1_BITS+3);
|
|
1152 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
|
|
1153 CONST_BITS+PASS1_BITS+3);
|
|
1154 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
|
|
1155 CONST_BITS+PASS1_BITS+3);
|
|
1156 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
|
|
1157 CONST_BITS+PASS1_BITS+3);
|
|
1158 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
|
|
1159 CONST_BITS+PASS1_BITS+3);
|
|
1160 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
|
|
1161 CONST_BITS+PASS1_BITS+3);
|
|
1162
|
|
1163 dataptr++; /* advance pointer to next column */
|
|
1164 }
|
|
1165 }
|
|
1166
|
|
1167
|