comparison jrevdct.c @ 0:986e461dc072 libavcodec

Initial revision
author glantau
date Sun, 22 Jul 2001 14:18:56 +0000
parents
children 23723a0ebd24
comparison
equal deleted inserted replaced
-1:000000000000 0:986e461dc072
1 /*
2 * jrevdct.c
3 *
4 * Copyright (C) 1991, 1992, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains the basic inverse-DCT transformation subroutine.
9 *
10 * This implementation is based on an algorithm described in
11 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
12 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
13 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
14 * The primary algorithm described there uses 11 multiplies and 29 adds.
15 * We use their alternate method with 12 multiplies and 32 adds.
16 * The advantage of this method is that no data path contains more than one
17 * multiplication; this allows a very simple and accurate implementation in
18 * scaled fixed-point arithmetic, with a minimal number of shifts.
19 *
20 * I've made lots of modifications to attempt to take advantage of the
21 * sparse nature of the DCT matrices we're getting. Although the logic
22 * is cumbersome, it's straightforward and the resulting code is much
23 * faster.
24 *
25 * A better way to do this would be to pass in the DCT block as a sparse
26 * matrix, perhaps with the difference cases encoded.
27 */
28 #include "common.h"
29 #include "dsputil.h"
30
31 #define EIGHT_BIT_SAMPLES
32
33 #define DCTSIZE 8
34 #define DCTSIZE2 64
35
36 #define GLOBAL
37
38 #define RIGHT_SHIFT(x, n) ((x) >> (n))
39
40 typedef DCTELEM DCTBLOCK[DCTSIZE2];
41
42 #define CONST_BITS 13
43
44 /*
45 * This routine is specialized to the case DCTSIZE = 8.
46 */
47
48 #if DCTSIZE != 8
49 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
50 #endif
51
52
53 /*
54 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
55 * on each column. Direct algorithms are also available, but they are
56 * much more complex and seem not to be any faster when reduced to code.
57 *
58 * The poop on this scaling stuff is as follows:
59 *
60 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
61 * larger than the true IDCT outputs. The final outputs are therefore
62 * a factor of N larger than desired; since N=8 this can be cured by
63 * a simple right shift at the end of the algorithm. The advantage of
64 * this arrangement is that we save two multiplications per 1-D IDCT,
65 * because the y0 and y4 inputs need not be divided by sqrt(N).
66 *
67 * We have to do addition and subtraction of the integer inputs, which
68 * is no problem, and multiplication by fractional constants, which is
69 * a problem to do in integer arithmetic. We multiply all the constants
70 * by CONST_SCALE and convert them to integer constants (thus retaining
71 * CONST_BITS bits of precision in the constants). After doing a
72 * multiplication we have to divide the product by CONST_SCALE, with proper
73 * rounding, to produce the correct output. This division can be done
74 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
75 * as long as possible so that partial sums can be added together with
76 * full fractional precision.
77 *
78 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
79 * they are represented to better-than-integral precision. These outputs
80 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
81 * with the recommended scaling. (To scale up 12-bit sample data further, an
82 * intermediate int32 array would be needed.)
83 *
84 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
85 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
86 * shows that the values given below are the most effective.
87 */
88
89 #ifdef EIGHT_BIT_SAMPLES
90 #define PASS1_BITS 2
91 #else
92 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
93 #endif
94
95 #define ONE ((INT32) 1)
96
97 #define CONST_SCALE (ONE << CONST_BITS)
98
99 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
100 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
101 * you will pay a significant penalty in run time. In that case, figure
102 * the correct integer constant values and insert them by hand.
103 */
104
105 /* Actually FIX is no longer used, we precomputed them all */
106 #define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5))
107
108 /* Descale and correctly round an INT32 value that's scaled by N bits.
109 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
110 * the fudge factor is correct for either sign of X.
111 */
112
113 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
114
115 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
116 * For 8-bit samples with the recommended scaling, all the variable
117 * and constant values involved are no more than 16 bits wide, so a
118 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
119 * this provides a useful speedup on many machines.
120 * There is no way to specify a 16x16->32 multiply in portable C, but
121 * some C compilers will do the right thing if you provide the correct
122 * combination of casts.
123 * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
124 */
125
126 #ifdef EIGHT_BIT_SAMPLES
127 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
128 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))
129 #endif
130 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
131 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const)))
132 #endif
133 #endif
134
135 #ifndef MULTIPLY /* default definition */
136 #define MULTIPLY(var,const) ((var) * (const))
137 #endif
138
139
140 /*
141 Unlike our decoder where we approximate the FIXes, we need to use exact
142 ones here or successive P-frames will drift too much with Reference frame coding
143 */
144 #define FIX_0_211164243 1730
145 #define FIX_0_275899380 2260
146 #define FIX_0_298631336 2446
147 #define FIX_0_390180644 3196
148 #define FIX_0_509795579 4176
149 #define FIX_0_541196100 4433
150 #define FIX_0_601344887 4926
151 #define FIX_0_765366865 6270
152 #define FIX_0_785694958 6436
153 #define FIX_0_899976223 7373
154 #define FIX_1_061594337 8697
155 #define FIX_1_111140466 9102
156 #define FIX_1_175875602 9633
157 #define FIX_1_306562965 10703
158 #define FIX_1_387039845 11363
159 #define FIX_1_451774981 11893
160 #define FIX_1_501321110 12299
161 #define FIX_1_662939225 13623
162 #define FIX_1_847759065 15137
163 #define FIX_1_961570560 16069
164 #define FIX_2_053119869 16819
165 #define FIX_2_172734803 17799
166 #define FIX_2_562915447 20995
167 #define FIX_3_072711026 25172
168
169 /*
170 * Perform the inverse DCT on one block of coefficients.
171 */
172
173 void j_rev_dct(DCTBLOCK data)
174 {
175 INT32 tmp0, tmp1, tmp2, tmp3;
176 INT32 tmp10, tmp11, tmp12, tmp13;
177 INT32 z1, z2, z3, z4, z5;
178 INT32 d0, d1, d2, d3, d4, d5, d6, d7;
179 register DCTELEM *dataptr;
180 int rowctr;
181
182 /* Pass 1: process rows. */
183 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
184 /* furthermore, we scale the results by 2**PASS1_BITS. */
185
186 dataptr = data;
187
188 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
189 /* Due to quantization, we will usually find that many of the input
190 * coefficients are zero, especially the AC terms. We can exploit this
191 * by short-circuiting the IDCT calculation for any row in which all
192 * the AC terms are zero. In that case each output is equal to the
193 * DC coefficient (with scale factor as needed).
194 * With typical images and quantization tables, half or more of the
195 * row DCT calculations can be simplified this way.
196 */
197
198 register int *idataptr = (int*)dataptr;
199
200 d0 = dataptr[0];
201 d1 = dataptr[1];
202 d2 = dataptr[2];
203 d3 = dataptr[3];
204 d4 = dataptr[4];
205 d5 = dataptr[5];
206 d6 = dataptr[6];
207 d7 = dataptr[7];
208
209 if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) {
210 /* AC terms all zero */
211 if (d0) {
212 /* Compute a 32 bit value to assign. */
213 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
214 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
215
216 idataptr[0] = v;
217 idataptr[1] = v;
218 idataptr[2] = v;
219 idataptr[3] = v;
220 }
221
222 dataptr += DCTSIZE; /* advance pointer to next row */
223 continue;
224 }
225
226 /* Even part: reverse the even part of the forward DCT. */
227 /* The rotator is sqrt(2)*c(-6). */
228 {
229 if (d6) {
230 if (d4) {
231 if (d2) {
232 if (d0) {
233 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
234 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
235 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
236 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
237
238 tmp0 = (d0 + d4) << CONST_BITS;
239 tmp1 = (d0 - d4) << CONST_BITS;
240
241 tmp10 = tmp0 + tmp3;
242 tmp13 = tmp0 - tmp3;
243 tmp11 = tmp1 + tmp2;
244 tmp12 = tmp1 - tmp2;
245 } else {
246 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
247 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
248 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
249 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
250
251 tmp0 = d4 << CONST_BITS;
252
253 tmp10 = tmp0 + tmp3;
254 tmp13 = tmp0 - tmp3;
255 tmp11 = tmp2 - tmp0;
256 tmp12 = -(tmp0 + tmp2);
257 }
258 } else {
259 if (d0) {
260 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
261 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
262 tmp3 = MULTIPLY(d6, FIX_0_541196100);
263
264 tmp0 = (d0 + d4) << CONST_BITS;
265 tmp1 = (d0 - d4) << CONST_BITS;
266
267 tmp10 = tmp0 + tmp3;
268 tmp13 = tmp0 - tmp3;
269 tmp11 = tmp1 + tmp2;
270 tmp12 = tmp1 - tmp2;
271 } else {
272 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
273 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
274 tmp3 = MULTIPLY(d6, FIX_0_541196100);
275
276 tmp0 = d4 << CONST_BITS;
277
278 tmp10 = tmp0 + tmp3;
279 tmp13 = tmp0 - tmp3;
280 tmp11 = tmp2 - tmp0;
281 tmp12 = -(tmp0 + tmp2);
282 }
283 }
284 } else {
285 if (d2) {
286 if (d0) {
287 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
288 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
289 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
290 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
291
292 tmp0 = d0 << CONST_BITS;
293
294 tmp10 = tmp0 + tmp3;
295 tmp13 = tmp0 - tmp3;
296 tmp11 = tmp0 + tmp2;
297 tmp12 = tmp0 - tmp2;
298 } else {
299 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
300 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
301 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
302 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
303
304 tmp10 = tmp3;
305 tmp13 = -tmp3;
306 tmp11 = tmp2;
307 tmp12 = -tmp2;
308 }
309 } else {
310 if (d0) {
311 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
312 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
313 tmp3 = MULTIPLY(d6, FIX_0_541196100);
314
315 tmp0 = d0 << CONST_BITS;
316
317 tmp10 = tmp0 + tmp3;
318 tmp13 = tmp0 - tmp3;
319 tmp11 = tmp0 + tmp2;
320 tmp12 = tmp0 - tmp2;
321 } else {
322 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
323 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
324 tmp3 = MULTIPLY(d6, FIX_0_541196100);
325
326 tmp10 = tmp3;
327 tmp13 = -tmp3;
328 tmp11 = tmp2;
329 tmp12 = -tmp2;
330 }
331 }
332 }
333 } else {
334 if (d4) {
335 if (d2) {
336 if (d0) {
337 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
338 tmp2 = MULTIPLY(d2, FIX_0_541196100);
339 tmp3 = MULTIPLY(d2, FIX_1_306562965);
340
341 tmp0 = (d0 + d4) << CONST_BITS;
342 tmp1 = (d0 - d4) << CONST_BITS;
343
344 tmp10 = tmp0 + tmp3;
345 tmp13 = tmp0 - tmp3;
346 tmp11 = tmp1 + tmp2;
347 tmp12 = tmp1 - tmp2;
348 } else {
349 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
350 tmp2 = MULTIPLY(d2, FIX_0_541196100);
351 tmp3 = MULTIPLY(d2, FIX_1_306562965);
352
353 tmp0 = d4 << CONST_BITS;
354
355 tmp10 = tmp0 + tmp3;
356 tmp13 = tmp0 - tmp3;
357 tmp11 = tmp2 - tmp0;
358 tmp12 = -(tmp0 + tmp2);
359 }
360 } else {
361 if (d0) {
362 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
363 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
364 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
365 } else {
366 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
367 tmp10 = tmp13 = d4 << CONST_BITS;
368 tmp11 = tmp12 = -tmp10;
369 }
370 }
371 } else {
372 if (d2) {
373 if (d0) {
374 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
375 tmp2 = MULTIPLY(d2, FIX_0_541196100);
376 tmp3 = MULTIPLY(d2, FIX_1_306562965);
377
378 tmp0 = d0 << CONST_BITS;
379
380 tmp10 = tmp0 + tmp3;
381 tmp13 = tmp0 - tmp3;
382 tmp11 = tmp0 + tmp2;
383 tmp12 = tmp0 - tmp2;
384 } else {
385 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
386 tmp2 = MULTIPLY(d2, FIX_0_541196100);
387 tmp3 = MULTIPLY(d2, FIX_1_306562965);
388
389 tmp10 = tmp3;
390 tmp13 = -tmp3;
391 tmp11 = tmp2;
392 tmp12 = -tmp2;
393 }
394 } else {
395 if (d0) {
396 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
397 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
398 } else {
399 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
400 tmp10 = tmp13 = tmp11 = tmp12 = 0;
401 }
402 }
403 }
404 }
405
406 /* Odd part per figure 8; the matrix is unitary and hence its
407 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
408 */
409
410 if (d7) {
411 if (d5) {
412 if (d3) {
413 if (d1) {
414 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
415 z1 = d7 + d1;
416 z2 = d5 + d3;
417 z3 = d7 + d3;
418 z4 = d5 + d1;
419 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
420
421 tmp0 = MULTIPLY(d7, FIX_0_298631336);
422 tmp1 = MULTIPLY(d5, FIX_2_053119869);
423 tmp2 = MULTIPLY(d3, FIX_3_072711026);
424 tmp3 = MULTIPLY(d1, FIX_1_501321110);
425 z1 = MULTIPLY(-z1, FIX_0_899976223);
426 z2 = MULTIPLY(-z2, FIX_2_562915447);
427 z3 = MULTIPLY(-z3, FIX_1_961570560);
428 z4 = MULTIPLY(-z4, FIX_0_390180644);
429
430 z3 += z5;
431 z4 += z5;
432
433 tmp0 += z1 + z3;
434 tmp1 += z2 + z4;
435 tmp2 += z2 + z3;
436 tmp3 += z1 + z4;
437 } else {
438 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
439 z2 = d5 + d3;
440 z3 = d7 + d3;
441 z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
442
443 tmp0 = MULTIPLY(d7, FIX_0_298631336);
444 tmp1 = MULTIPLY(d5, FIX_2_053119869);
445 tmp2 = MULTIPLY(d3, FIX_3_072711026);
446 z1 = MULTIPLY(-d7, FIX_0_899976223);
447 z2 = MULTIPLY(-z2, FIX_2_562915447);
448 z3 = MULTIPLY(-z3, FIX_1_961570560);
449 z4 = MULTIPLY(-d5, FIX_0_390180644);
450
451 z3 += z5;
452 z4 += z5;
453
454 tmp0 += z1 + z3;
455 tmp1 += z2 + z4;
456 tmp2 += z2 + z3;
457 tmp3 = z1 + z4;
458 }
459 } else {
460 if (d1) {
461 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
462 z1 = d7 + d1;
463 z4 = d5 + d1;
464 z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
465
466 tmp0 = MULTIPLY(d7, FIX_0_298631336);
467 tmp1 = MULTIPLY(d5, FIX_2_053119869);
468 tmp3 = MULTIPLY(d1, FIX_1_501321110);
469 z1 = MULTIPLY(-z1, FIX_0_899976223);
470 z2 = MULTIPLY(-d5, FIX_2_562915447);
471 z3 = MULTIPLY(-d7, FIX_1_961570560);
472 z4 = MULTIPLY(-z4, FIX_0_390180644);
473
474 z3 += z5;
475 z4 += z5;
476
477 tmp0 += z1 + z3;
478 tmp1 += z2 + z4;
479 tmp2 = z2 + z3;
480 tmp3 += z1 + z4;
481 } else {
482 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
483 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
484 z1 = MULTIPLY(-d7, FIX_0_899976223);
485 z3 = MULTIPLY(-d7, FIX_1_961570560);
486 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
487 z2 = MULTIPLY(-d5, FIX_2_562915447);
488 z4 = MULTIPLY(-d5, FIX_0_390180644);
489 z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
490
491 z3 += z5;
492 z4 += z5;
493
494 tmp0 += z3;
495 tmp1 += z4;
496 tmp2 = z2 + z3;
497 tmp3 = z1 + z4;
498 }
499 }
500 } else {
501 if (d3) {
502 if (d1) {
503 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
504 z1 = d7 + d1;
505 z3 = d7 + d3;
506 z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
507
508 tmp0 = MULTIPLY(d7, FIX_0_298631336);
509 tmp2 = MULTIPLY(d3, FIX_3_072711026);
510 tmp3 = MULTIPLY(d1, FIX_1_501321110);
511 z1 = MULTIPLY(-z1, FIX_0_899976223);
512 z2 = MULTIPLY(-d3, FIX_2_562915447);
513 z3 = MULTIPLY(-z3, FIX_1_961570560);
514 z4 = MULTIPLY(-d1, FIX_0_390180644);
515
516 z3 += z5;
517 z4 += z5;
518
519 tmp0 += z1 + z3;
520 tmp1 = z2 + z4;
521 tmp2 += z2 + z3;
522 tmp3 += z1 + z4;
523 } else {
524 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
525 z3 = d7 + d3;
526
527 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
528 z1 = MULTIPLY(-d7, FIX_0_899976223);
529 tmp2 = MULTIPLY(d3, FIX_0_509795579);
530 z2 = MULTIPLY(-d3, FIX_2_562915447);
531 z5 = MULTIPLY(z3, FIX_1_175875602);
532 z3 = MULTIPLY(-z3, FIX_0_785694958);
533
534 tmp0 += z3;
535 tmp1 = z2 + z5;
536 tmp2 += z3;
537 tmp3 = z1 + z5;
538 }
539 } else {
540 if (d1) {
541 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
542 z1 = d7 + d1;
543 z5 = MULTIPLY(z1, FIX_1_175875602);
544
545 z1 = MULTIPLY(z1, FIX_0_275899380);
546 z3 = MULTIPLY(-d7, FIX_1_961570560);
547 tmp0 = MULTIPLY(-d7, FIX_1_662939225);
548 z4 = MULTIPLY(-d1, FIX_0_390180644);
549 tmp3 = MULTIPLY(d1, FIX_1_111140466);
550
551 tmp0 += z1;
552 tmp1 = z4 + z5;
553 tmp2 = z3 + z5;
554 tmp3 += z1;
555 } else {
556 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
557 tmp0 = MULTIPLY(-d7, FIX_1_387039845);
558 tmp1 = MULTIPLY(d7, FIX_1_175875602);
559 tmp2 = MULTIPLY(-d7, FIX_0_785694958);
560 tmp3 = MULTIPLY(d7, FIX_0_275899380);
561 }
562 }
563 }
564 } else {
565 if (d5) {
566 if (d3) {
567 if (d1) {
568 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
569 z2 = d5 + d3;
570 z4 = d5 + d1;
571 z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
572
573 tmp1 = MULTIPLY(d5, FIX_2_053119869);
574 tmp2 = MULTIPLY(d3, FIX_3_072711026);
575 tmp3 = MULTIPLY(d1, FIX_1_501321110);
576 z1 = MULTIPLY(-d1, FIX_0_899976223);
577 z2 = MULTIPLY(-z2, FIX_2_562915447);
578 z3 = MULTIPLY(-d3, FIX_1_961570560);
579 z4 = MULTIPLY(-z4, FIX_0_390180644);
580
581 z3 += z5;
582 z4 += z5;
583
584 tmp0 = z1 + z3;
585 tmp1 += z2 + z4;
586 tmp2 += z2 + z3;
587 tmp3 += z1 + z4;
588 } else {
589 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
590 z2 = d5 + d3;
591
592 z5 = MULTIPLY(z2, FIX_1_175875602);
593 tmp1 = MULTIPLY(d5, FIX_1_662939225);
594 z4 = MULTIPLY(-d5, FIX_0_390180644);
595 z2 = MULTIPLY(-z2, FIX_1_387039845);
596 tmp2 = MULTIPLY(d3, FIX_1_111140466);
597 z3 = MULTIPLY(-d3, FIX_1_961570560);
598
599 tmp0 = z3 + z5;
600 tmp1 += z2;
601 tmp2 += z2;
602 tmp3 = z4 + z5;
603 }
604 } else {
605 if (d1) {
606 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
607 z4 = d5 + d1;
608
609 z5 = MULTIPLY(z4, FIX_1_175875602);
610 z1 = MULTIPLY(-d1, FIX_0_899976223);
611 tmp3 = MULTIPLY(d1, FIX_0_601344887);
612 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
613 z2 = MULTIPLY(-d5, FIX_2_562915447);
614 z4 = MULTIPLY(z4, FIX_0_785694958);
615
616 tmp0 = z1 + z5;
617 tmp1 += z4;
618 tmp2 = z2 + z5;
619 tmp3 += z4;
620 } else {
621 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
622 tmp0 = MULTIPLY(d5, FIX_1_175875602);
623 tmp1 = MULTIPLY(d5, FIX_0_275899380);
624 tmp2 = MULTIPLY(-d5, FIX_1_387039845);
625 tmp3 = MULTIPLY(d5, FIX_0_785694958);
626 }
627 }
628 } else {
629 if (d3) {
630 if (d1) {
631 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
632 z5 = d1 + d3;
633 tmp3 = MULTIPLY(d1, FIX_0_211164243);
634 tmp2 = MULTIPLY(-d3, FIX_1_451774981);
635 z1 = MULTIPLY(d1, FIX_1_061594337);
636 z2 = MULTIPLY(-d3, FIX_2_172734803);
637 z4 = MULTIPLY(z5, FIX_0_785694958);
638 z5 = MULTIPLY(z5, FIX_1_175875602);
639
640 tmp0 = z1 - z4;
641 tmp1 = z2 + z4;
642 tmp2 += z5;
643 tmp3 += z5;
644 } else {
645 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
646 tmp0 = MULTIPLY(-d3, FIX_0_785694958);
647 tmp1 = MULTIPLY(-d3, FIX_1_387039845);
648 tmp2 = MULTIPLY(-d3, FIX_0_275899380);
649 tmp3 = MULTIPLY(d3, FIX_1_175875602);
650 }
651 } else {
652 if (d1) {
653 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
654 tmp0 = MULTIPLY(d1, FIX_0_275899380);
655 tmp1 = MULTIPLY(d1, FIX_0_785694958);
656 tmp2 = MULTIPLY(d1, FIX_1_175875602);
657 tmp3 = MULTIPLY(d1, FIX_1_387039845);
658 } else {
659 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
660 tmp0 = tmp1 = tmp2 = tmp3 = 0;
661 }
662 }
663 }
664 }
665 }
666 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
667
668 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
669 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
670 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
671 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
672 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
673 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
674 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
675 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
676
677 dataptr += DCTSIZE; /* advance pointer to next row */
678 }
679
680 /* Pass 2: process columns. */
681 /* Note that we must descale the results by a factor of 8 == 2**3, */
682 /* and also undo the PASS1_BITS scaling. */
683
684 dataptr = data;
685 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
686 /* Columns of zeroes can be exploited in the same way as we did with rows.
687 * However, the row calculation has created many nonzero AC terms, so the
688 * simplification applies less often (typically 5% to 10% of the time).
689 * On machines with very fast multiplication, it's possible that the
690 * test takes more time than it's worth. In that case this section
691 * may be commented out.
692 */
693
694 d0 = dataptr[DCTSIZE*0];
695 d1 = dataptr[DCTSIZE*1];
696 d2 = dataptr[DCTSIZE*2];
697 d3 = dataptr[DCTSIZE*3];
698 d4 = dataptr[DCTSIZE*4];
699 d5 = dataptr[DCTSIZE*5];
700 d6 = dataptr[DCTSIZE*6];
701 d7 = dataptr[DCTSIZE*7];
702
703 /* Even part: reverse the even part of the forward DCT. */
704 /* The rotator is sqrt(2)*c(-6). */
705 if (d6) {
706 if (d4) {
707 if (d2) {
708 if (d0) {
709 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
710 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
711 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
712 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
713
714 tmp0 = (d0 + d4) << CONST_BITS;
715 tmp1 = (d0 - d4) << CONST_BITS;
716
717 tmp10 = tmp0 + tmp3;
718 tmp13 = tmp0 - tmp3;
719 tmp11 = tmp1 + tmp2;
720 tmp12 = tmp1 - tmp2;
721 } else {
722 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
723 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
724 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
725 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
726
727 tmp0 = d4 << CONST_BITS;
728
729 tmp10 = tmp0 + tmp3;
730 tmp13 = tmp0 - tmp3;
731 tmp11 = tmp2 - tmp0;
732 tmp12 = -(tmp0 + tmp2);
733 }
734 } else {
735 if (d0) {
736 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
737 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
738 tmp3 = MULTIPLY(d6, FIX_0_541196100);
739
740 tmp0 = (d0 + d4) << CONST_BITS;
741 tmp1 = (d0 - d4) << CONST_BITS;
742
743 tmp10 = tmp0 + tmp3;
744 tmp13 = tmp0 - tmp3;
745 tmp11 = tmp1 + tmp2;
746 tmp12 = tmp1 - tmp2;
747 } else {
748 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
749 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
750 tmp3 = MULTIPLY(d6, FIX_0_541196100);
751
752 tmp0 = d4 << CONST_BITS;
753
754 tmp10 = tmp0 + tmp3;
755 tmp13 = tmp0 - tmp3;
756 tmp11 = tmp2 - tmp0;
757 tmp12 = -(tmp0 + tmp2);
758 }
759 }
760 } else {
761 if (d2) {
762 if (d0) {
763 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
764 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
765 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
766 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
767
768 tmp0 = d0 << CONST_BITS;
769
770 tmp10 = tmp0 + tmp3;
771 tmp13 = tmp0 - tmp3;
772 tmp11 = tmp0 + tmp2;
773 tmp12 = tmp0 - tmp2;
774 } else {
775 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
776 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
777 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
778 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
779
780 tmp10 = tmp3;
781 tmp13 = -tmp3;
782 tmp11 = tmp2;
783 tmp12 = -tmp2;
784 }
785 } else {
786 if (d0) {
787 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
788 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
789 tmp3 = MULTIPLY(d6, FIX_0_541196100);
790
791 tmp0 = d0 << CONST_BITS;
792
793 tmp10 = tmp0 + tmp3;
794 tmp13 = tmp0 - tmp3;
795 tmp11 = tmp0 + tmp2;
796 tmp12 = tmp0 - tmp2;
797 } else {
798 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
799 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
800 tmp3 = MULTIPLY(d6, FIX_0_541196100);
801
802 tmp10 = tmp3;
803 tmp13 = -tmp3;
804 tmp11 = tmp2;
805 tmp12 = -tmp2;
806 }
807 }
808 }
809 } else {
810 if (d4) {
811 if (d2) {
812 if (d0) {
813 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
814 tmp2 = MULTIPLY(d2, FIX_0_541196100);
815 tmp3 = MULTIPLY(d2, FIX_1_306562965);
816
817 tmp0 = (d0 + d4) << CONST_BITS;
818 tmp1 = (d0 - d4) << CONST_BITS;
819
820 tmp10 = tmp0 + tmp3;
821 tmp13 = tmp0 - tmp3;
822 tmp11 = tmp1 + tmp2;
823 tmp12 = tmp1 - tmp2;
824 } else {
825 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
826 tmp2 = MULTIPLY(d2, FIX_0_541196100);
827 tmp3 = MULTIPLY(d2, FIX_1_306562965);
828
829 tmp0 = d4 << CONST_BITS;
830
831 tmp10 = tmp0 + tmp3;
832 tmp13 = tmp0 - tmp3;
833 tmp11 = tmp2 - tmp0;
834 tmp12 = -(tmp0 + tmp2);
835 }
836 } else {
837 if (d0) {
838 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
839 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
840 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
841 } else {
842 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
843 tmp10 = tmp13 = d4 << CONST_BITS;
844 tmp11 = tmp12 = -tmp10;
845 }
846 }
847 } else {
848 if (d2) {
849 if (d0) {
850 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
851 tmp2 = MULTIPLY(d2, FIX_0_541196100);
852 tmp3 = MULTIPLY(d2, FIX_1_306562965);
853
854 tmp0 = d0 << CONST_BITS;
855
856 tmp10 = tmp0 + tmp3;
857 tmp13 = tmp0 - tmp3;
858 tmp11 = tmp0 + tmp2;
859 tmp12 = tmp0 - tmp2;
860 } else {
861 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
862 tmp2 = MULTIPLY(d2, FIX_0_541196100);
863 tmp3 = MULTIPLY(d2, FIX_1_306562965);
864
865 tmp10 = tmp3;
866 tmp13 = -tmp3;
867 tmp11 = tmp2;
868 tmp12 = -tmp2;
869 }
870 } else {
871 if (d0) {
872 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
873 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
874 } else {
875 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
876 tmp10 = tmp13 = tmp11 = tmp12 = 0;
877 }
878 }
879 }
880 }
881
882 /* Odd part per figure 8; the matrix is unitary and hence its
883 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
884 */
885 if (d7) {
886 if (d5) {
887 if (d3) {
888 if (d1) {
889 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
890 z1 = d7 + d1;
891 z2 = d5 + d3;
892 z3 = d7 + d3;
893 z4 = d5 + d1;
894 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
895
896 tmp0 = MULTIPLY(d7, FIX_0_298631336);
897 tmp1 = MULTIPLY(d5, FIX_2_053119869);
898 tmp2 = MULTIPLY(d3, FIX_3_072711026);
899 tmp3 = MULTIPLY(d1, FIX_1_501321110);
900 z1 = MULTIPLY(-z1, FIX_0_899976223);
901 z2 = MULTIPLY(-z2, FIX_2_562915447);
902 z3 = MULTIPLY(-z3, FIX_1_961570560);
903 z4 = MULTIPLY(-z4, FIX_0_390180644);
904
905 z3 += z5;
906 z4 += z5;
907
908 tmp0 += z1 + z3;
909 tmp1 += z2 + z4;
910 tmp2 += z2 + z3;
911 tmp3 += z1 + z4;
912 } else {
913 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
914 z1 = d7;
915 z2 = d5 + d3;
916 z3 = d7 + d3;
917 z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
918
919 tmp0 = MULTIPLY(d7, FIX_0_298631336);
920 tmp1 = MULTIPLY(d5, FIX_2_053119869);
921 tmp2 = MULTIPLY(d3, FIX_3_072711026);
922 z1 = MULTIPLY(-d7, FIX_0_899976223);
923 z2 = MULTIPLY(-z2, FIX_2_562915447);
924 z3 = MULTIPLY(-z3, FIX_1_961570560);
925 z4 = MULTIPLY(-d5, FIX_0_390180644);
926
927 z3 += z5;
928 z4 += z5;
929
930 tmp0 += z1 + z3;
931 tmp1 += z2 + z4;
932 tmp2 += z2 + z3;
933 tmp3 = z1 + z4;
934 }
935 } else {
936 if (d1) {
937 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
938 z1 = d7 + d1;
939 z2 = d5;
940 z3 = d7;
941 z4 = d5 + d1;
942 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
943
944 tmp0 = MULTIPLY(d7, FIX_0_298631336);
945 tmp1 = MULTIPLY(d5, FIX_2_053119869);
946 tmp3 = MULTIPLY(d1, FIX_1_501321110);
947 z1 = MULTIPLY(-z1, FIX_0_899976223);
948 z2 = MULTIPLY(-d5, FIX_2_562915447);
949 z3 = MULTIPLY(-d7, FIX_1_961570560);
950 z4 = MULTIPLY(-z4, FIX_0_390180644);
951
952 z3 += z5;
953 z4 += z5;
954
955 tmp0 += z1 + z3;
956 tmp1 += z2 + z4;
957 tmp2 = z2 + z3;
958 tmp3 += z1 + z4;
959 } else {
960 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
961 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
962 z1 = MULTIPLY(-d7, FIX_0_899976223);
963 z3 = MULTIPLY(-d7, FIX_1_961570560);
964 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
965 z2 = MULTIPLY(-d5, FIX_2_562915447);
966 z4 = MULTIPLY(-d5, FIX_0_390180644);
967 z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
968
969 z3 += z5;
970 z4 += z5;
971
972 tmp0 += z3;
973 tmp1 += z4;
974 tmp2 = z2 + z3;
975 tmp3 = z1 + z4;
976 }
977 }
978 } else {
979 if (d3) {
980 if (d1) {
981 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
982 z1 = d7 + d1;
983 z3 = d7 + d3;
984 z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
985
986 tmp0 = MULTIPLY(d7, FIX_0_298631336);
987 tmp2 = MULTIPLY(d3, FIX_3_072711026);
988 tmp3 = MULTIPLY(d1, FIX_1_501321110);
989 z1 = MULTIPLY(-z1, FIX_0_899976223);
990 z2 = MULTIPLY(-d3, FIX_2_562915447);
991 z3 = MULTIPLY(-z3, FIX_1_961570560);
992 z4 = MULTIPLY(-d1, FIX_0_390180644);
993
994 z3 += z5;
995 z4 += z5;
996
997 tmp0 += z1 + z3;
998 tmp1 = z2 + z4;
999 tmp2 += z2 + z3;
1000 tmp3 += z1 + z4;
1001 } else {
1002 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1003 z3 = d7 + d3;
1004
1005 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
1006 z1 = MULTIPLY(-d7, FIX_0_899976223);
1007 tmp2 = MULTIPLY(d3, FIX_0_509795579);
1008 z2 = MULTIPLY(-d3, FIX_2_562915447);
1009 z5 = MULTIPLY(z3, FIX_1_175875602);
1010 z3 = MULTIPLY(-z3, FIX_0_785694958);
1011
1012 tmp0 += z3;
1013 tmp1 = z2 + z5;
1014 tmp2 += z3;
1015 tmp3 = z1 + z5;
1016 }
1017 } else {
1018 if (d1) {
1019 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1020 z1 = d7 + d1;
1021 z5 = MULTIPLY(z1, FIX_1_175875602);
1022
1023 z1 = MULTIPLY(z1, FIX_0_275899380);
1024 z3 = MULTIPLY(-d7, FIX_1_961570560);
1025 tmp0 = MULTIPLY(-d7, FIX_1_662939225);
1026 z4 = MULTIPLY(-d1, FIX_0_390180644);
1027 tmp3 = MULTIPLY(d1, FIX_1_111140466);
1028
1029 tmp0 += z1;
1030 tmp1 = z4 + z5;
1031 tmp2 = z3 + z5;
1032 tmp3 += z1;
1033 } else {
1034 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1035 tmp0 = MULTIPLY(-d7, FIX_1_387039845);
1036 tmp1 = MULTIPLY(d7, FIX_1_175875602);
1037 tmp2 = MULTIPLY(-d7, FIX_0_785694958);
1038 tmp3 = MULTIPLY(d7, FIX_0_275899380);
1039 }
1040 }
1041 }
1042 } else {
1043 if (d5) {
1044 if (d3) {
1045 if (d1) {
1046 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1047 z2 = d5 + d3;
1048 z4 = d5 + d1;
1049 z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
1050
1051 tmp1 = MULTIPLY(d5, FIX_2_053119869);
1052 tmp2 = MULTIPLY(d3, FIX_3_072711026);
1053 tmp3 = MULTIPLY(d1, FIX_1_501321110);
1054 z1 = MULTIPLY(-d1, FIX_0_899976223);
1055 z2 = MULTIPLY(-z2, FIX_2_562915447);
1056 z3 = MULTIPLY(-d3, FIX_1_961570560);
1057 z4 = MULTIPLY(-z4, FIX_0_390180644);
1058
1059 z3 += z5;
1060 z4 += z5;
1061
1062 tmp0 = z1 + z3;
1063 tmp1 += z2 + z4;
1064 tmp2 += z2 + z3;
1065 tmp3 += z1 + z4;
1066 } else {
1067 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1068 z2 = d5 + d3;
1069
1070 z5 = MULTIPLY(z2, FIX_1_175875602);
1071 tmp1 = MULTIPLY(d5, FIX_1_662939225);
1072 z4 = MULTIPLY(-d5, FIX_0_390180644);
1073 z2 = MULTIPLY(-z2, FIX_1_387039845);
1074 tmp2 = MULTIPLY(d3, FIX_1_111140466);
1075 z3 = MULTIPLY(-d3, FIX_1_961570560);
1076
1077 tmp0 = z3 + z5;
1078 tmp1 += z2;
1079 tmp2 += z2;
1080 tmp3 = z4 + z5;
1081 }
1082 } else {
1083 if (d1) {
1084 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1085 z4 = d5 + d1;
1086
1087 z5 = MULTIPLY(z4, FIX_1_175875602);
1088 z1 = MULTIPLY(-d1, FIX_0_899976223);
1089 tmp3 = MULTIPLY(d1, FIX_0_601344887);
1090 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
1091 z2 = MULTIPLY(-d5, FIX_2_562915447);
1092 z4 = MULTIPLY(z4, FIX_0_785694958);
1093
1094 tmp0 = z1 + z5;
1095 tmp1 += z4;
1096 tmp2 = z2 + z5;
1097 tmp3 += z4;
1098 } else {
1099 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1100 tmp0 = MULTIPLY(d5, FIX_1_175875602);
1101 tmp1 = MULTIPLY(d5, FIX_0_275899380);
1102 tmp2 = MULTIPLY(-d5, FIX_1_387039845);
1103 tmp3 = MULTIPLY(d5, FIX_0_785694958);
1104 }
1105 }
1106 } else {
1107 if (d3) {
1108 if (d1) {
1109 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1110 z5 = d1 + d3;
1111 tmp3 = MULTIPLY(d1, FIX_0_211164243);
1112 tmp2 = MULTIPLY(-d3, FIX_1_451774981);
1113 z1 = MULTIPLY(d1, FIX_1_061594337);
1114 z2 = MULTIPLY(-d3, FIX_2_172734803);
1115 z4 = MULTIPLY(z5, FIX_0_785694958);
1116 z5 = MULTIPLY(z5, FIX_1_175875602);
1117
1118 tmp0 = z1 - z4;
1119 tmp1 = z2 + z4;
1120 tmp2 += z5;
1121 tmp3 += z5;
1122 } else {
1123 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1124 tmp0 = MULTIPLY(-d3, FIX_0_785694958);
1125 tmp1 = MULTIPLY(-d3, FIX_1_387039845);
1126 tmp2 = MULTIPLY(-d3, FIX_0_275899380);
1127 tmp3 = MULTIPLY(d3, FIX_1_175875602);
1128 }
1129 } else {
1130 if (d1) {
1131 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1132 tmp0 = MULTIPLY(d1, FIX_0_275899380);
1133 tmp1 = MULTIPLY(d1, FIX_0_785694958);
1134 tmp2 = MULTIPLY(d1, FIX_1_175875602);
1135 tmp3 = MULTIPLY(d1, FIX_1_387039845);
1136 } else {
1137 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1138 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1139 }
1140 }
1141 }
1142 }
1143
1144 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1145
1146 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
1147 CONST_BITS+PASS1_BITS+3);
1148 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
1149 CONST_BITS+PASS1_BITS+3);
1150 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
1151 CONST_BITS+PASS1_BITS+3);
1152 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
1153 CONST_BITS+PASS1_BITS+3);
1154 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
1155 CONST_BITS+PASS1_BITS+3);
1156 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
1157 CONST_BITS+PASS1_BITS+3);
1158 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
1159 CONST_BITS+PASS1_BITS+3);
1160 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
1161 CONST_BITS+PASS1_BITS+3);
1162
1163 dataptr++; /* advance pointer to next column */
1164 }
1165 }
1166
1167