Mercurial > libavcodec.hg
comparison jrevdct.c @ 0:986e461dc072 libavcodec
Initial revision
author | glantau |
---|---|
date | Sun, 22 Jul 2001 14:18:56 +0000 |
parents | |
children | 23723a0ebd24 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:986e461dc072 |
---|---|
1 /* | |
2 * jrevdct.c | |
3 * | |
4 * Copyright (C) 1991, 1992, Thomas G. Lane. | |
5 * This file is part of the Independent JPEG Group's software. | |
6 * For conditions of distribution and use, see the accompanying README file. | |
7 * | |
8 * This file contains the basic inverse-DCT transformation subroutine. | |
9 * | |
10 * This implementation is based on an algorithm described in | |
11 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | |
12 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | |
13 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | |
14 * The primary algorithm described there uses 11 multiplies and 29 adds. | |
15 * We use their alternate method with 12 multiplies and 32 adds. | |
16 * The advantage of this method is that no data path contains more than one | |
17 * multiplication; this allows a very simple and accurate implementation in | |
18 * scaled fixed-point arithmetic, with a minimal number of shifts. | |
19 * | |
20 * I've made lots of modifications to attempt to take advantage of the | |
21 * sparse nature of the DCT matrices we're getting. Although the logic | |
22 * is cumbersome, it's straightforward and the resulting code is much | |
23 * faster. | |
24 * | |
25 * A better way to do this would be to pass in the DCT block as a sparse | |
26 * matrix, perhaps with the difference cases encoded. | |
27 */ | |
28 #include "common.h" | |
29 #include "dsputil.h" | |
30 | |
31 #define EIGHT_BIT_SAMPLES | |
32 | |
33 #define DCTSIZE 8 | |
34 #define DCTSIZE2 64 | |
35 | |
36 #define GLOBAL | |
37 | |
38 #define RIGHT_SHIFT(x, n) ((x) >> (n)) | |
39 | |
40 typedef DCTELEM DCTBLOCK[DCTSIZE2]; | |
41 | |
42 #define CONST_BITS 13 | |
43 | |
44 /* | |
45 * This routine is specialized to the case DCTSIZE = 8. | |
46 */ | |
47 | |
48 #if DCTSIZE != 8 | |
49 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ | |
50 #endif | |
51 | |
52 | |
53 /* | |
54 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT | |
55 * on each column. Direct algorithms are also available, but they are | |
56 * much more complex and seem not to be any faster when reduced to code. | |
57 * | |
58 * The poop on this scaling stuff is as follows: | |
59 * | |
60 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) | |
61 * larger than the true IDCT outputs. The final outputs are therefore | |
62 * a factor of N larger than desired; since N=8 this can be cured by | |
63 * a simple right shift at the end of the algorithm. The advantage of | |
64 * this arrangement is that we save two multiplications per 1-D IDCT, | |
65 * because the y0 and y4 inputs need not be divided by sqrt(N). | |
66 * | |
67 * We have to do addition and subtraction of the integer inputs, which | |
68 * is no problem, and multiplication by fractional constants, which is | |
69 * a problem to do in integer arithmetic. We multiply all the constants | |
70 * by CONST_SCALE and convert them to integer constants (thus retaining | |
71 * CONST_BITS bits of precision in the constants). After doing a | |
72 * multiplication we have to divide the product by CONST_SCALE, with proper | |
73 * rounding, to produce the correct output. This division can be done | |
74 * cheaply as a right shift of CONST_BITS bits. We postpone shifting | |
75 * as long as possible so that partial sums can be added together with | |
76 * full fractional precision. | |
77 * | |
78 * The outputs of the first pass are scaled up by PASS1_BITS bits so that | |
79 * they are represented to better-than-integral precision. These outputs | |
80 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | |
81 * with the recommended scaling. (To scale up 12-bit sample data further, an | |
82 * intermediate int32 array would be needed.) | |
83 * | |
84 * To avoid overflow of the 32-bit intermediate results in pass 2, we must | |
85 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis | |
86 * shows that the values given below are the most effective. | |
87 */ | |
88 | |
89 #ifdef EIGHT_BIT_SAMPLES | |
90 #define PASS1_BITS 2 | |
91 #else | |
92 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | |
93 #endif | |
94 | |
95 #define ONE ((INT32) 1) | |
96 | |
97 #define CONST_SCALE (ONE << CONST_BITS) | |
98 | |
99 /* Convert a positive real constant to an integer scaled by CONST_SCALE. | |
100 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, | |
101 * you will pay a significant penalty in run time. In that case, figure | |
102 * the correct integer constant values and insert them by hand. | |
103 */ | |
104 | |
105 /* Actually FIX is no longer used, we precomputed them all */ | |
106 #define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) | |
107 | |
108 /* Descale and correctly round an INT32 value that's scaled by N bits. | |
109 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding | |
110 * the fudge factor is correct for either sign of X. | |
111 */ | |
112 | |
113 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) | |
114 | |
115 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. | |
116 * For 8-bit samples with the recommended scaling, all the variable | |
117 * and constant values involved are no more than 16 bits wide, so a | |
118 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; | |
119 * this provides a useful speedup on many machines. | |
120 * There is no way to specify a 16x16->32 multiply in portable C, but | |
121 * some C compilers will do the right thing if you provide the correct | |
122 * combination of casts. | |
123 * NB: for 12-bit samples, a full 32-bit multiplication will be needed. | |
124 */ | |
125 | |
126 #ifdef EIGHT_BIT_SAMPLES | |
127 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ | |
128 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) | |
129 #endif | |
130 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ | |
131 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const))) | |
132 #endif | |
133 #endif | |
134 | |
135 #ifndef MULTIPLY /* default definition */ | |
136 #define MULTIPLY(var,const) ((var) * (const)) | |
137 #endif | |
138 | |
139 | |
140 /* | |
141 Unlike our decoder where we approximate the FIXes, we need to use exact | |
142 ones here or successive P-frames will drift too much with Reference frame coding | |
143 */ | |
144 #define FIX_0_211164243 1730 | |
145 #define FIX_0_275899380 2260 | |
146 #define FIX_0_298631336 2446 | |
147 #define FIX_0_390180644 3196 | |
148 #define FIX_0_509795579 4176 | |
149 #define FIX_0_541196100 4433 | |
150 #define FIX_0_601344887 4926 | |
151 #define FIX_0_765366865 6270 | |
152 #define FIX_0_785694958 6436 | |
153 #define FIX_0_899976223 7373 | |
154 #define FIX_1_061594337 8697 | |
155 #define FIX_1_111140466 9102 | |
156 #define FIX_1_175875602 9633 | |
157 #define FIX_1_306562965 10703 | |
158 #define FIX_1_387039845 11363 | |
159 #define FIX_1_451774981 11893 | |
160 #define FIX_1_501321110 12299 | |
161 #define FIX_1_662939225 13623 | |
162 #define FIX_1_847759065 15137 | |
163 #define FIX_1_961570560 16069 | |
164 #define FIX_2_053119869 16819 | |
165 #define FIX_2_172734803 17799 | |
166 #define FIX_2_562915447 20995 | |
167 #define FIX_3_072711026 25172 | |
168 | |
169 /* | |
170 * Perform the inverse DCT on one block of coefficients. | |
171 */ | |
172 | |
173 void j_rev_dct(DCTBLOCK data) | |
174 { | |
175 INT32 tmp0, tmp1, tmp2, tmp3; | |
176 INT32 tmp10, tmp11, tmp12, tmp13; | |
177 INT32 z1, z2, z3, z4, z5; | |
178 INT32 d0, d1, d2, d3, d4, d5, d6, d7; | |
179 register DCTELEM *dataptr; | |
180 int rowctr; | |
181 | |
182 /* Pass 1: process rows. */ | |
183 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
184 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
185 | |
186 dataptr = data; | |
187 | |
188 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
189 /* Due to quantization, we will usually find that many of the input | |
190 * coefficients are zero, especially the AC terms. We can exploit this | |
191 * by short-circuiting the IDCT calculation for any row in which all | |
192 * the AC terms are zero. In that case each output is equal to the | |
193 * DC coefficient (with scale factor as needed). | |
194 * With typical images and quantization tables, half or more of the | |
195 * row DCT calculations can be simplified this way. | |
196 */ | |
197 | |
198 register int *idataptr = (int*)dataptr; | |
199 | |
200 d0 = dataptr[0]; | |
201 d1 = dataptr[1]; | |
202 d2 = dataptr[2]; | |
203 d3 = dataptr[3]; | |
204 d4 = dataptr[4]; | |
205 d5 = dataptr[5]; | |
206 d6 = dataptr[6]; | |
207 d7 = dataptr[7]; | |
208 | |
209 if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) { | |
210 /* AC terms all zero */ | |
211 if (d0) { | |
212 /* Compute a 32 bit value to assign. */ | |
213 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
214 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
215 | |
216 idataptr[0] = v; | |
217 idataptr[1] = v; | |
218 idataptr[2] = v; | |
219 idataptr[3] = v; | |
220 } | |
221 | |
222 dataptr += DCTSIZE; /* advance pointer to next row */ | |
223 continue; | |
224 } | |
225 | |
226 /* Even part: reverse the even part of the forward DCT. */ | |
227 /* The rotator is sqrt(2)*c(-6). */ | |
228 { | |
229 if (d6) { | |
230 if (d4) { | |
231 if (d2) { | |
232 if (d0) { | |
233 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
234 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
235 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
236 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
237 | |
238 tmp0 = (d0 + d4) << CONST_BITS; | |
239 tmp1 = (d0 - d4) << CONST_BITS; | |
240 | |
241 tmp10 = tmp0 + tmp3; | |
242 tmp13 = tmp0 - tmp3; | |
243 tmp11 = tmp1 + tmp2; | |
244 tmp12 = tmp1 - tmp2; | |
245 } else { | |
246 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ | |
247 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
248 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
249 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
250 | |
251 tmp0 = d4 << CONST_BITS; | |
252 | |
253 tmp10 = tmp0 + tmp3; | |
254 tmp13 = tmp0 - tmp3; | |
255 tmp11 = tmp2 - tmp0; | |
256 tmp12 = -(tmp0 + tmp2); | |
257 } | |
258 } else { | |
259 if (d0) { | |
260 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
261 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
262 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
263 | |
264 tmp0 = (d0 + d4) << CONST_BITS; | |
265 tmp1 = (d0 - d4) << CONST_BITS; | |
266 | |
267 tmp10 = tmp0 + tmp3; | |
268 tmp13 = tmp0 - tmp3; | |
269 tmp11 = tmp1 + tmp2; | |
270 tmp12 = tmp1 - tmp2; | |
271 } else { | |
272 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ | |
273 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
274 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
275 | |
276 tmp0 = d4 << CONST_BITS; | |
277 | |
278 tmp10 = tmp0 + tmp3; | |
279 tmp13 = tmp0 - tmp3; | |
280 tmp11 = tmp2 - tmp0; | |
281 tmp12 = -(tmp0 + tmp2); | |
282 } | |
283 } | |
284 } else { | |
285 if (d2) { | |
286 if (d0) { | |
287 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ | |
288 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
289 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
290 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
291 | |
292 tmp0 = d0 << CONST_BITS; | |
293 | |
294 tmp10 = tmp0 + tmp3; | |
295 tmp13 = tmp0 - tmp3; | |
296 tmp11 = tmp0 + tmp2; | |
297 tmp12 = tmp0 - tmp2; | |
298 } else { | |
299 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ | |
300 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
301 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
302 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
303 | |
304 tmp10 = tmp3; | |
305 tmp13 = -tmp3; | |
306 tmp11 = tmp2; | |
307 tmp12 = -tmp2; | |
308 } | |
309 } else { | |
310 if (d0) { | |
311 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ | |
312 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
313 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
314 | |
315 tmp0 = d0 << CONST_BITS; | |
316 | |
317 tmp10 = tmp0 + tmp3; | |
318 tmp13 = tmp0 - tmp3; | |
319 tmp11 = tmp0 + tmp2; | |
320 tmp12 = tmp0 - tmp2; | |
321 } else { | |
322 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ | |
323 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
324 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
325 | |
326 tmp10 = tmp3; | |
327 tmp13 = -tmp3; | |
328 tmp11 = tmp2; | |
329 tmp12 = -tmp2; | |
330 } | |
331 } | |
332 } | |
333 } else { | |
334 if (d4) { | |
335 if (d2) { | |
336 if (d0) { | |
337 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
338 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
339 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
340 | |
341 tmp0 = (d0 + d4) << CONST_BITS; | |
342 tmp1 = (d0 - d4) << CONST_BITS; | |
343 | |
344 tmp10 = tmp0 + tmp3; | |
345 tmp13 = tmp0 - tmp3; | |
346 tmp11 = tmp1 + tmp2; | |
347 tmp12 = tmp1 - tmp2; | |
348 } else { | |
349 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ | |
350 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
351 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
352 | |
353 tmp0 = d4 << CONST_BITS; | |
354 | |
355 tmp10 = tmp0 + tmp3; | |
356 tmp13 = tmp0 - tmp3; | |
357 tmp11 = tmp2 - tmp0; | |
358 tmp12 = -(tmp0 + tmp2); | |
359 } | |
360 } else { | |
361 if (d0) { | |
362 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
363 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
364 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
365 } else { | |
366 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ | |
367 tmp10 = tmp13 = d4 << CONST_BITS; | |
368 tmp11 = tmp12 = -tmp10; | |
369 } | |
370 } | |
371 } else { | |
372 if (d2) { | |
373 if (d0) { | |
374 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ | |
375 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
376 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
377 | |
378 tmp0 = d0 << CONST_BITS; | |
379 | |
380 tmp10 = tmp0 + tmp3; | |
381 tmp13 = tmp0 - tmp3; | |
382 tmp11 = tmp0 + tmp2; | |
383 tmp12 = tmp0 - tmp2; | |
384 } else { | |
385 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ | |
386 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
387 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
388 | |
389 tmp10 = tmp3; | |
390 tmp13 = -tmp3; | |
391 tmp11 = tmp2; | |
392 tmp12 = -tmp2; | |
393 } | |
394 } else { | |
395 if (d0) { | |
396 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ | |
397 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; | |
398 } else { | |
399 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ | |
400 tmp10 = tmp13 = tmp11 = tmp12 = 0; | |
401 } | |
402 } | |
403 } | |
404 } | |
405 | |
406 /* Odd part per figure 8; the matrix is unitary and hence its | |
407 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
408 */ | |
409 | |
410 if (d7) { | |
411 if (d5) { | |
412 if (d3) { | |
413 if (d1) { | |
414 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
415 z1 = d7 + d1; | |
416 z2 = d5 + d3; | |
417 z3 = d7 + d3; | |
418 z4 = d5 + d1; | |
419 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
420 | |
421 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
422 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
423 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
424 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
425 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
426 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
427 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
428 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
429 | |
430 z3 += z5; | |
431 z4 += z5; | |
432 | |
433 tmp0 += z1 + z3; | |
434 tmp1 += z2 + z4; | |
435 tmp2 += z2 + z3; | |
436 tmp3 += z1 + z4; | |
437 } else { | |
438 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
439 z2 = d5 + d3; | |
440 z3 = d7 + d3; | |
441 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
442 | |
443 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
444 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
445 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
446 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
447 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
448 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
449 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
450 | |
451 z3 += z5; | |
452 z4 += z5; | |
453 | |
454 tmp0 += z1 + z3; | |
455 tmp1 += z2 + z4; | |
456 tmp2 += z2 + z3; | |
457 tmp3 = z1 + z4; | |
458 } | |
459 } else { | |
460 if (d1) { | |
461 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
462 z1 = d7 + d1; | |
463 z4 = d5 + d1; | |
464 z5 = MULTIPLY(d7 + z4, FIX_1_175875602); | |
465 | |
466 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
467 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
468 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
469 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
470 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
471 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
472 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
473 | |
474 z3 += z5; | |
475 z4 += z5; | |
476 | |
477 tmp0 += z1 + z3; | |
478 tmp1 += z2 + z4; | |
479 tmp2 = z2 + z3; | |
480 tmp3 += z1 + z4; | |
481 } else { | |
482 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
483 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
484 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
485 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
486 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
487 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
488 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
489 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
490 | |
491 z3 += z5; | |
492 z4 += z5; | |
493 | |
494 tmp0 += z3; | |
495 tmp1 += z4; | |
496 tmp2 = z2 + z3; | |
497 tmp3 = z1 + z4; | |
498 } | |
499 } | |
500 } else { | |
501 if (d3) { | |
502 if (d1) { | |
503 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
504 z1 = d7 + d1; | |
505 z3 = d7 + d3; | |
506 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
507 | |
508 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
509 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
510 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
511 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
512 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
513 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
514 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
515 | |
516 z3 += z5; | |
517 z4 += z5; | |
518 | |
519 tmp0 += z1 + z3; | |
520 tmp1 = z2 + z4; | |
521 tmp2 += z2 + z3; | |
522 tmp3 += z1 + z4; | |
523 } else { | |
524 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
525 z3 = d7 + d3; | |
526 | |
527 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
528 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
529 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
530 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
531 z5 = MULTIPLY(z3, FIX_1_175875602); | |
532 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
533 | |
534 tmp0 += z3; | |
535 tmp1 = z2 + z5; | |
536 tmp2 += z3; | |
537 tmp3 = z1 + z5; | |
538 } | |
539 } else { | |
540 if (d1) { | |
541 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
542 z1 = d7 + d1; | |
543 z5 = MULTIPLY(z1, FIX_1_175875602); | |
544 | |
545 z1 = MULTIPLY(z1, FIX_0_275899380); | |
546 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
547 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
548 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
549 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
550 | |
551 tmp0 += z1; | |
552 tmp1 = z4 + z5; | |
553 tmp2 = z3 + z5; | |
554 tmp3 += z1; | |
555 } else { | |
556 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
557 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
558 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
559 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
560 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
561 } | |
562 } | |
563 } | |
564 } else { | |
565 if (d5) { | |
566 if (d3) { | |
567 if (d1) { | |
568 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
569 z2 = d5 + d3; | |
570 z4 = d5 + d1; | |
571 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
572 | |
573 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
574 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
575 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
576 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
577 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
578 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
579 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
580 | |
581 z3 += z5; | |
582 z4 += z5; | |
583 | |
584 tmp0 = z1 + z3; | |
585 tmp1 += z2 + z4; | |
586 tmp2 += z2 + z3; | |
587 tmp3 += z1 + z4; | |
588 } else { | |
589 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
590 z2 = d5 + d3; | |
591 | |
592 z5 = MULTIPLY(z2, FIX_1_175875602); | |
593 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
594 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
595 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
596 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
597 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
598 | |
599 tmp0 = z3 + z5; | |
600 tmp1 += z2; | |
601 tmp2 += z2; | |
602 tmp3 = z4 + z5; | |
603 } | |
604 } else { | |
605 if (d1) { | |
606 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
607 z4 = d5 + d1; | |
608 | |
609 z5 = MULTIPLY(z4, FIX_1_175875602); | |
610 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
611 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
612 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
613 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
614 z4 = MULTIPLY(z4, FIX_0_785694958); | |
615 | |
616 tmp0 = z1 + z5; | |
617 tmp1 += z4; | |
618 tmp2 = z2 + z5; | |
619 tmp3 += z4; | |
620 } else { | |
621 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
622 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
623 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
624 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
625 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
626 } | |
627 } | |
628 } else { | |
629 if (d3) { | |
630 if (d1) { | |
631 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
632 z5 = d1 + d3; | |
633 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
634 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
635 z1 = MULTIPLY(d1, FIX_1_061594337); | |
636 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
637 z4 = MULTIPLY(z5, FIX_0_785694958); | |
638 z5 = MULTIPLY(z5, FIX_1_175875602); | |
639 | |
640 tmp0 = z1 - z4; | |
641 tmp1 = z2 + z4; | |
642 tmp2 += z5; | |
643 tmp3 += z5; | |
644 } else { | |
645 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
646 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
647 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
648 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
649 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
650 } | |
651 } else { | |
652 if (d1) { | |
653 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
654 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
655 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
656 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
657 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
658 } else { | |
659 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
660 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
661 } | |
662 } | |
663 } | |
664 } | |
665 } | |
666 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
667 | |
668 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
669 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
670 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
671 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
672 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
673 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
674 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
675 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
676 | |
677 dataptr += DCTSIZE; /* advance pointer to next row */ | |
678 } | |
679 | |
680 /* Pass 2: process columns. */ | |
681 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
682 /* and also undo the PASS1_BITS scaling. */ | |
683 | |
684 dataptr = data; | |
685 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
686 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
687 * However, the row calculation has created many nonzero AC terms, so the | |
688 * simplification applies less often (typically 5% to 10% of the time). | |
689 * On machines with very fast multiplication, it's possible that the | |
690 * test takes more time than it's worth. In that case this section | |
691 * may be commented out. | |
692 */ | |
693 | |
694 d0 = dataptr[DCTSIZE*0]; | |
695 d1 = dataptr[DCTSIZE*1]; | |
696 d2 = dataptr[DCTSIZE*2]; | |
697 d3 = dataptr[DCTSIZE*3]; | |
698 d4 = dataptr[DCTSIZE*4]; | |
699 d5 = dataptr[DCTSIZE*5]; | |
700 d6 = dataptr[DCTSIZE*6]; | |
701 d7 = dataptr[DCTSIZE*7]; | |
702 | |
703 /* Even part: reverse the even part of the forward DCT. */ | |
704 /* The rotator is sqrt(2)*c(-6). */ | |
705 if (d6) { | |
706 if (d4) { | |
707 if (d2) { | |
708 if (d0) { | |
709 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
710 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
711 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
712 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
713 | |
714 tmp0 = (d0 + d4) << CONST_BITS; | |
715 tmp1 = (d0 - d4) << CONST_BITS; | |
716 | |
717 tmp10 = tmp0 + tmp3; | |
718 tmp13 = tmp0 - tmp3; | |
719 tmp11 = tmp1 + tmp2; | |
720 tmp12 = tmp1 - tmp2; | |
721 } else { | |
722 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ | |
723 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
724 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
725 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
726 | |
727 tmp0 = d4 << CONST_BITS; | |
728 | |
729 tmp10 = tmp0 + tmp3; | |
730 tmp13 = tmp0 - tmp3; | |
731 tmp11 = tmp2 - tmp0; | |
732 tmp12 = -(tmp0 + tmp2); | |
733 } | |
734 } else { | |
735 if (d0) { | |
736 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
737 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
738 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
739 | |
740 tmp0 = (d0 + d4) << CONST_BITS; | |
741 tmp1 = (d0 - d4) << CONST_BITS; | |
742 | |
743 tmp10 = tmp0 + tmp3; | |
744 tmp13 = tmp0 - tmp3; | |
745 tmp11 = tmp1 + tmp2; | |
746 tmp12 = tmp1 - tmp2; | |
747 } else { | |
748 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ | |
749 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
750 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
751 | |
752 tmp0 = d4 << CONST_BITS; | |
753 | |
754 tmp10 = tmp0 + tmp3; | |
755 tmp13 = tmp0 - tmp3; | |
756 tmp11 = tmp2 - tmp0; | |
757 tmp12 = -(tmp0 + tmp2); | |
758 } | |
759 } | |
760 } else { | |
761 if (d2) { | |
762 if (d0) { | |
763 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ | |
764 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
765 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
766 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
767 | |
768 tmp0 = d0 << CONST_BITS; | |
769 | |
770 tmp10 = tmp0 + tmp3; | |
771 tmp13 = tmp0 - tmp3; | |
772 tmp11 = tmp0 + tmp2; | |
773 tmp12 = tmp0 - tmp2; | |
774 } else { | |
775 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ | |
776 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
777 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
778 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
779 | |
780 tmp10 = tmp3; | |
781 tmp13 = -tmp3; | |
782 tmp11 = tmp2; | |
783 tmp12 = -tmp2; | |
784 } | |
785 } else { | |
786 if (d0) { | |
787 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ | |
788 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
789 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
790 | |
791 tmp0 = d0 << CONST_BITS; | |
792 | |
793 tmp10 = tmp0 + tmp3; | |
794 tmp13 = tmp0 - tmp3; | |
795 tmp11 = tmp0 + tmp2; | |
796 tmp12 = tmp0 - tmp2; | |
797 } else { | |
798 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ | |
799 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
800 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
801 | |
802 tmp10 = tmp3; | |
803 tmp13 = -tmp3; | |
804 tmp11 = tmp2; | |
805 tmp12 = -tmp2; | |
806 } | |
807 } | |
808 } | |
809 } else { | |
810 if (d4) { | |
811 if (d2) { | |
812 if (d0) { | |
813 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
814 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
815 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
816 | |
817 tmp0 = (d0 + d4) << CONST_BITS; | |
818 tmp1 = (d0 - d4) << CONST_BITS; | |
819 | |
820 tmp10 = tmp0 + tmp3; | |
821 tmp13 = tmp0 - tmp3; | |
822 tmp11 = tmp1 + tmp2; | |
823 tmp12 = tmp1 - tmp2; | |
824 } else { | |
825 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ | |
826 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
827 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
828 | |
829 tmp0 = d4 << CONST_BITS; | |
830 | |
831 tmp10 = tmp0 + tmp3; | |
832 tmp13 = tmp0 - tmp3; | |
833 tmp11 = tmp2 - tmp0; | |
834 tmp12 = -(tmp0 + tmp2); | |
835 } | |
836 } else { | |
837 if (d0) { | |
838 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
839 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
840 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
841 } else { | |
842 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ | |
843 tmp10 = tmp13 = d4 << CONST_BITS; | |
844 tmp11 = tmp12 = -tmp10; | |
845 } | |
846 } | |
847 } else { | |
848 if (d2) { | |
849 if (d0) { | |
850 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ | |
851 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
852 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
853 | |
854 tmp0 = d0 << CONST_BITS; | |
855 | |
856 tmp10 = tmp0 + tmp3; | |
857 tmp13 = tmp0 - tmp3; | |
858 tmp11 = tmp0 + tmp2; | |
859 tmp12 = tmp0 - tmp2; | |
860 } else { | |
861 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ | |
862 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
863 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
864 | |
865 tmp10 = tmp3; | |
866 tmp13 = -tmp3; | |
867 tmp11 = tmp2; | |
868 tmp12 = -tmp2; | |
869 } | |
870 } else { | |
871 if (d0) { | |
872 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ | |
873 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; | |
874 } else { | |
875 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ | |
876 tmp10 = tmp13 = tmp11 = tmp12 = 0; | |
877 } | |
878 } | |
879 } | |
880 } | |
881 | |
882 /* Odd part per figure 8; the matrix is unitary and hence its | |
883 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
884 */ | |
885 if (d7) { | |
886 if (d5) { | |
887 if (d3) { | |
888 if (d1) { | |
889 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
890 z1 = d7 + d1; | |
891 z2 = d5 + d3; | |
892 z3 = d7 + d3; | |
893 z4 = d5 + d1; | |
894 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
895 | |
896 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
897 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
898 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
899 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
900 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
901 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
902 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
903 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
904 | |
905 z3 += z5; | |
906 z4 += z5; | |
907 | |
908 tmp0 += z1 + z3; | |
909 tmp1 += z2 + z4; | |
910 tmp2 += z2 + z3; | |
911 tmp3 += z1 + z4; | |
912 } else { | |
913 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
914 z1 = d7; | |
915 z2 = d5 + d3; | |
916 z3 = d7 + d3; | |
917 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
918 | |
919 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
920 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
921 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
922 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
923 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
924 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
925 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
926 | |
927 z3 += z5; | |
928 z4 += z5; | |
929 | |
930 tmp0 += z1 + z3; | |
931 tmp1 += z2 + z4; | |
932 tmp2 += z2 + z3; | |
933 tmp3 = z1 + z4; | |
934 } | |
935 } else { | |
936 if (d1) { | |
937 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
938 z1 = d7 + d1; | |
939 z2 = d5; | |
940 z3 = d7; | |
941 z4 = d5 + d1; | |
942 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
943 | |
944 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
945 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
946 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
947 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
948 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
949 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
950 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
951 | |
952 z3 += z5; | |
953 z4 += z5; | |
954 | |
955 tmp0 += z1 + z3; | |
956 tmp1 += z2 + z4; | |
957 tmp2 = z2 + z3; | |
958 tmp3 += z1 + z4; | |
959 } else { | |
960 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
961 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
962 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
963 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
964 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
965 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
966 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
967 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
968 | |
969 z3 += z5; | |
970 z4 += z5; | |
971 | |
972 tmp0 += z3; | |
973 tmp1 += z4; | |
974 tmp2 = z2 + z3; | |
975 tmp3 = z1 + z4; | |
976 } | |
977 } | |
978 } else { | |
979 if (d3) { | |
980 if (d1) { | |
981 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
982 z1 = d7 + d1; | |
983 z3 = d7 + d3; | |
984 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
985 | |
986 tmp0 = MULTIPLY(d7, FIX_0_298631336); | |
987 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
988 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
989 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
990 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
991 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
992 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
993 | |
994 z3 += z5; | |
995 z4 += z5; | |
996 | |
997 tmp0 += z1 + z3; | |
998 tmp1 = z2 + z4; | |
999 tmp2 += z2 + z3; | |
1000 tmp3 += z1 + z4; | |
1001 } else { | |
1002 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
1003 z3 = d7 + d3; | |
1004 | |
1005 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
1006 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
1007 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
1008 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
1009 z5 = MULTIPLY(z3, FIX_1_175875602); | |
1010 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
1011 | |
1012 tmp0 += z3; | |
1013 tmp1 = z2 + z5; | |
1014 tmp2 += z3; | |
1015 tmp3 = z1 + z5; | |
1016 } | |
1017 } else { | |
1018 if (d1) { | |
1019 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
1020 z1 = d7 + d1; | |
1021 z5 = MULTIPLY(z1, FIX_1_175875602); | |
1022 | |
1023 z1 = MULTIPLY(z1, FIX_0_275899380); | |
1024 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
1025 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
1026 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
1027 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
1028 | |
1029 tmp0 += z1; | |
1030 tmp1 = z4 + z5; | |
1031 tmp2 = z3 + z5; | |
1032 tmp3 += z1; | |
1033 } else { | |
1034 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
1035 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
1036 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
1037 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
1038 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
1039 } | |
1040 } | |
1041 } | |
1042 } else { | |
1043 if (d5) { | |
1044 if (d3) { | |
1045 if (d1) { | |
1046 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
1047 z2 = d5 + d3; | |
1048 z4 = d5 + d1; | |
1049 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
1050 | |
1051 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
1052 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
1053 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
1054 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
1055 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
1056 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
1057 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
1058 | |
1059 z3 += z5; | |
1060 z4 += z5; | |
1061 | |
1062 tmp0 = z1 + z3; | |
1063 tmp1 += z2 + z4; | |
1064 tmp2 += z2 + z3; | |
1065 tmp3 += z1 + z4; | |
1066 } else { | |
1067 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
1068 z2 = d5 + d3; | |
1069 | |
1070 z5 = MULTIPLY(z2, FIX_1_175875602); | |
1071 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
1072 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
1073 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
1074 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
1075 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
1076 | |
1077 tmp0 = z3 + z5; | |
1078 tmp1 += z2; | |
1079 tmp2 += z2; | |
1080 tmp3 = z4 + z5; | |
1081 } | |
1082 } else { | |
1083 if (d1) { | |
1084 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
1085 z4 = d5 + d1; | |
1086 | |
1087 z5 = MULTIPLY(z4, FIX_1_175875602); | |
1088 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
1089 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
1090 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
1091 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
1092 z4 = MULTIPLY(z4, FIX_0_785694958); | |
1093 | |
1094 tmp0 = z1 + z5; | |
1095 tmp1 += z4; | |
1096 tmp2 = z2 + z5; | |
1097 tmp3 += z4; | |
1098 } else { | |
1099 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
1100 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
1101 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
1102 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
1103 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
1104 } | |
1105 } | |
1106 } else { | |
1107 if (d3) { | |
1108 if (d1) { | |
1109 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
1110 z5 = d1 + d3; | |
1111 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
1112 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
1113 z1 = MULTIPLY(d1, FIX_1_061594337); | |
1114 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
1115 z4 = MULTIPLY(z5, FIX_0_785694958); | |
1116 z5 = MULTIPLY(z5, FIX_1_175875602); | |
1117 | |
1118 tmp0 = z1 - z4; | |
1119 tmp1 = z2 + z4; | |
1120 tmp2 += z5; | |
1121 tmp3 += z5; | |
1122 } else { | |
1123 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
1124 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
1125 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
1126 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
1127 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
1128 } | |
1129 } else { | |
1130 if (d1) { | |
1131 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
1132 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
1133 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
1134 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
1135 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
1136 } else { | |
1137 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
1138 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
1139 } | |
1140 } | |
1141 } | |
1142 } | |
1143 | |
1144 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
1145 | |
1146 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, | |
1147 CONST_BITS+PASS1_BITS+3); | |
1148 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, | |
1149 CONST_BITS+PASS1_BITS+3); | |
1150 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, | |
1151 CONST_BITS+PASS1_BITS+3); | |
1152 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, | |
1153 CONST_BITS+PASS1_BITS+3); | |
1154 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, | |
1155 CONST_BITS+PASS1_BITS+3); | |
1156 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, | |
1157 CONST_BITS+PASS1_BITS+3); | |
1158 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, | |
1159 CONST_BITS+PASS1_BITS+3); | |
1160 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, | |
1161 CONST_BITS+PASS1_BITS+3); | |
1162 | |
1163 dataptr++; /* advance pointer to next column */ | |
1164 } | |
1165 } | |
1166 | |
1167 |