Mercurial > libavcodec.hg
annotate jrevdct.c @ 6017:e1404acccac3 libavcodec
Actually return with an error condition if we're being asked to deal with too
many reference frames. Also check max num ref frames against our internal
ref buffer sizes.
Part of fix for roundup issue 281
author | heydowns |
---|---|
date | Fri, 14 Dec 2007 05:48:27 +0000 |
parents | 9b98e18a1b1c |
children | f7cbb7733146 |
rev | line source |
---|---|
0 | 1 /* |
2 * jrevdct.c | |
3 * | |
4 * This file is part of the Independent JPEG Group's software. | |
3669
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
5 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
6 * The authors make NO WARRANTY or representation, either express or implied, |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
7 * with respect to this software, its quality, accuracy, merchantability, or |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
8 * fitness for a particular purpose. This software is provided "AS IS", and |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
9 * you, its user, assume the entire risk as to its quality and accuracy. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
10 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
11 * This software is copyright (C) 1991, 1992, Thomas G. Lane. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
12 * All Rights Reserved except as specified below. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
13 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
14 * Permission is hereby granted to use, copy, modify, and distribute this |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
15 * software (or portions thereof) for any purpose, without fee, subject to |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
16 * these conditions: |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
17 * (1) If any part of the source code for this software is distributed, then |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
18 * this README file must be included, with this copyright and no-warranty |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
19 * notice unaltered; and any additions, deletions, or changes to the original |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
20 * files must be clearly indicated in accompanying documentation. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
21 * (2) If only executable code is distributed, then the accompanying |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
22 * documentation must state that "this software is based in part on the work |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
23 * of the Independent JPEG Group". |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
24 * (3) Permission for use of this software is granted only if the user accepts |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
25 * full responsibility for any undesirable consequences; the authors accept |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
26 * NO LIABILITY for damages of any kind. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
27 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
28 * These conditions apply to any software derived from or based on the IJG |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
29 * code, not just to the unmodified library. If you use our work, you ought |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
30 * to acknowledge us. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
31 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
32 * Permission is NOT granted for the use of any IJG author's name or company |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
33 * name in advertising or publicity relating to this software or products |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
34 * derived from it. This software may be referred to only as "the Independent |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
35 * JPEG Group's software". |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
36 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
37 * We specifically permit and encourage the use of this software as the basis |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
38 * of commercial products, provided that all warranty or liability claims are |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
39 * assumed by the product vendor. |
0 | 40 * |
41 * This file contains the basic inverse-DCT transformation subroutine. | |
42 * | |
43 * This implementation is based on an algorithm described in | |
44 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | |
45 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | |
46 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | |
47 * The primary algorithm described there uses 11 multiplies and 29 adds. | |
48 * We use their alternate method with 12 multiplies and 32 adds. | |
49 * The advantage of this method is that no data path contains more than one | |
50 * multiplication; this allows a very simple and accurate implementation in | |
51 * scaled fixed-point arithmetic, with a minimal number of shifts. | |
2967 | 52 * |
0 | 53 * I've made lots of modifications to attempt to take advantage of the |
54 * sparse nature of the DCT matrices we're getting. Although the logic | |
55 * is cumbersome, it's straightforward and the resulting code is much | |
56 * faster. | |
57 * | |
58 * A better way to do this would be to pass in the DCT block as a sparse | |
59 * matrix, perhaps with the difference cases encoded. | |
60 */ | |
2967 | 61 |
1106 | 62 /** |
63 * @file jrevdct.c | |
64 * Independent JPEG Group's LLM idct. | |
65 */ | |
2967 | 66 |
0 | 67 #include "common.h" |
68 #include "dsputil.h" | |
69 | |
70 #define EIGHT_BIT_SAMPLES | |
71 | |
72 #define DCTSIZE 8 | |
73 #define DCTSIZE2 64 | |
74 | |
75 #define GLOBAL | |
76 | |
77 #define RIGHT_SHIFT(x, n) ((x) >> (n)) | |
78 | |
79 typedef DCTELEM DCTBLOCK[DCTSIZE2]; | |
80 | |
81 #define CONST_BITS 13 | |
82 | |
83 /* | |
84 * This routine is specialized to the case DCTSIZE = 8. | |
85 */ | |
86 | |
87 #if DCTSIZE != 8 | |
88 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ | |
89 #endif | |
90 | |
91 | |
92 /* | |
93 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT | |
94 * on each column. Direct algorithms are also available, but they are | |
95 * much more complex and seem not to be any faster when reduced to code. | |
96 * | |
97 * The poop on this scaling stuff is as follows: | |
98 * | |
99 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) | |
100 * larger than the true IDCT outputs. The final outputs are therefore | |
101 * a factor of N larger than desired; since N=8 this can be cured by | |
102 * a simple right shift at the end of the algorithm. The advantage of | |
103 * this arrangement is that we save two multiplications per 1-D IDCT, | |
104 * because the y0 and y4 inputs need not be divided by sqrt(N). | |
105 * | |
106 * We have to do addition and subtraction of the integer inputs, which | |
107 * is no problem, and multiplication by fractional constants, which is | |
108 * a problem to do in integer arithmetic. We multiply all the constants | |
109 * by CONST_SCALE and convert them to integer constants (thus retaining | |
110 * CONST_BITS bits of precision in the constants). After doing a | |
111 * multiplication we have to divide the product by CONST_SCALE, with proper | |
112 * rounding, to produce the correct output. This division can be done | |
113 * cheaply as a right shift of CONST_BITS bits. We postpone shifting | |
114 * as long as possible so that partial sums can be added together with | |
115 * full fractional precision. | |
116 * | |
117 * The outputs of the first pass are scaled up by PASS1_BITS bits so that | |
118 * they are represented to better-than-integral precision. These outputs | |
119 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | |
120 * with the recommended scaling. (To scale up 12-bit sample data further, an | |
121 * intermediate int32 array would be needed.) | |
122 * | |
123 * To avoid overflow of the 32-bit intermediate results in pass 2, we must | |
124 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis | |
125 * shows that the values given below are the most effective. | |
126 */ | |
127 | |
128 #ifdef EIGHT_BIT_SAMPLES | |
129 #define PASS1_BITS 2 | |
130 #else | |
2979 | 131 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
0 | 132 #endif |
133 | |
2979 | 134 #define ONE ((int32_t) 1) |
0 | 135 |
136 #define CONST_SCALE (ONE << CONST_BITS) | |
137 | |
138 /* Convert a positive real constant to an integer scaled by CONST_SCALE. | |
139 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, | |
140 * you will pay a significant penalty in run time. In that case, figure | |
141 * the correct integer constant values and insert them by hand. | |
142 */ | |
143 | |
144 /* Actually FIX is no longer used, we precomputed them all */ | |
2979 | 145 #define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) |
0 | 146 |
1064 | 147 /* Descale and correctly round an int32_t value that's scaled by N bits. |
0 | 148 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding |
149 * the fudge factor is correct for either sign of X. | |
150 */ | |
151 | |
152 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) | |
153 | |
1064 | 154 /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. |
0 | 155 * For 8-bit samples with the recommended scaling, all the variable |
156 * and constant values involved are no more than 16 bits wide, so a | |
157 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; | |
158 * this provides a useful speedup on many machines. | |
159 * There is no way to specify a 16x16->32 multiply in portable C, but | |
160 * some C compilers will do the right thing if you provide the correct | |
161 * combination of casts. | |
162 * NB: for 12-bit samples, a full 32-bit multiplication will be needed. | |
163 */ | |
164 | |
165 #ifdef EIGHT_BIT_SAMPLES | |
2979 | 166 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ |
1064 | 167 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) |
0 | 168 #endif |
2979 | 169 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ |
1064 | 170 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) |
0 | 171 #endif |
172 #endif | |
173 | |
2979 | 174 #ifndef MULTIPLY /* default definition */ |
0 | 175 #define MULTIPLY(var,const) ((var) * (const)) |
176 #endif | |
177 | |
178 | |
2967 | 179 /* |
0 | 180 Unlike our decoder where we approximate the FIXes, we need to use exact |
2967 | 181 ones here or successive P-frames will drift too much with Reference frame coding |
0 | 182 */ |
183 #define FIX_0_211164243 1730 | |
184 #define FIX_0_275899380 2260 | |
185 #define FIX_0_298631336 2446 | |
186 #define FIX_0_390180644 3196 | |
187 #define FIX_0_509795579 4176 | |
188 #define FIX_0_541196100 4433 | |
189 #define FIX_0_601344887 4926 | |
190 #define FIX_0_765366865 6270 | |
191 #define FIX_0_785694958 6436 | |
192 #define FIX_0_899976223 7373 | |
193 #define FIX_1_061594337 8697 | |
194 #define FIX_1_111140466 9102 | |
195 #define FIX_1_175875602 9633 | |
196 #define FIX_1_306562965 10703 | |
197 #define FIX_1_387039845 11363 | |
198 #define FIX_1_451774981 11893 | |
199 #define FIX_1_501321110 12299 | |
200 #define FIX_1_662939225 13623 | |
201 #define FIX_1_847759065 15137 | |
202 #define FIX_1_961570560 16069 | |
203 #define FIX_2_053119869 16819 | |
204 #define FIX_2_172734803 17799 | |
205 #define FIX_2_562915447 20995 | |
206 #define FIX_3_072711026 25172 | |
207 | |
208 /* | |
209 * Perform the inverse DCT on one block of coefficients. | |
210 */ | |
211 | |
212 void j_rev_dct(DCTBLOCK data) | |
213 { | |
1064 | 214 int32_t tmp0, tmp1, tmp2, tmp3; |
215 int32_t tmp10, tmp11, tmp12, tmp13; | |
216 int32_t z1, z2, z3, z4, z5; | |
217 int32_t d0, d1, d2, d3, d4, d5, d6, d7; | |
0 | 218 register DCTELEM *dataptr; |
219 int rowctr; | |
2967 | 220 |
0 | 221 /* Pass 1: process rows. */ |
222 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
223 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
224 | |
225 dataptr = data; | |
226 | |
227 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
228 /* Due to quantization, we will usually find that many of the input | |
229 * coefficients are zero, especially the AC terms. We can exploit this | |
230 * by short-circuiting the IDCT calculation for any row in which all | |
231 * the AC terms are zero. In that case each output is equal to the | |
232 * DC coefficient (with scale factor as needed). | |
233 * With typical images and quantization tables, half or more of the | |
234 * row DCT calculations can be simplified this way. | |
235 */ | |
236 | |
237 register int *idataptr = (int*)dataptr; | |
238 | |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
239 /* WARNING: we do the same permutation as MMX idct to simplify the |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
240 video core */ |
0 | 241 d0 = dataptr[0]; |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
242 d2 = dataptr[1]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
243 d4 = dataptr[2]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
244 d6 = dataptr[3]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
245 d1 = dataptr[4]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
246 d3 = dataptr[5]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
247 d5 = dataptr[6]; |
0 | 248 d7 = dataptr[7]; |
249 | |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
250 if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { |
0 | 251 /* AC terms all zero */ |
252 if (d0) { | |
2979 | 253 /* Compute a 32 bit value to assign. */ |
254 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
255 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
2967 | 256 |
2979 | 257 idataptr[0] = v; |
258 idataptr[1] = v; | |
259 idataptr[2] = v; | |
260 idataptr[3] = v; | |
0 | 261 } |
2967 | 262 |
2979 | 263 dataptr += DCTSIZE; /* advance pointer to next row */ |
0 | 264 continue; |
265 } | |
266 | |
267 /* Even part: reverse the even part of the forward DCT. */ | |
268 /* The rotator is sqrt(2)*c(-6). */ | |
269 { | |
270 if (d6) { | |
2979 | 271 if (d2) { |
272 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
273 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
274 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
275 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
0 | 276 |
2979 | 277 tmp0 = (d0 + d4) << CONST_BITS; |
278 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 279 |
2979 | 280 tmp10 = tmp0 + tmp3; |
281 tmp13 = tmp0 - tmp3; | |
282 tmp11 = tmp1 + tmp2; | |
283 tmp12 = tmp1 - tmp2; | |
284 } else { | |
285 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
286 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
287 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
0 | 288 |
2979 | 289 tmp0 = (d0 + d4) << CONST_BITS; |
290 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 291 |
2979 | 292 tmp10 = tmp0 + tmp3; |
293 tmp13 = tmp0 - tmp3; | |
294 tmp11 = tmp1 + tmp2; | |
295 tmp12 = tmp1 - tmp2; | |
296 } | |
2263 | 297 } else { |
2979 | 298 if (d2) { |
299 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
300 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
301 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
0 | 302 |
2979 | 303 tmp0 = (d0 + d4) << CONST_BITS; |
304 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 305 |
2979 | 306 tmp10 = tmp0 + tmp3; |
307 tmp13 = tmp0 - tmp3; | |
308 tmp11 = tmp1 + tmp2; | |
309 tmp12 = tmp1 - tmp2; | |
310 } else { | |
311 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
312 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
313 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
314 } | |
0 | 315 } |
316 | |
317 /* Odd part per figure 8; the matrix is unitary and hence its | |
318 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
319 */ | |
320 | |
321 if (d7) { | |
2979 | 322 if (d5) { |
323 if (d3) { | |
324 if (d1) { | |
325 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
326 z1 = d7 + d1; | |
327 z2 = d5 + d3; | |
328 z3 = d7 + d3; | |
329 z4 = d5 + d1; | |
330 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
2967 | 331 |
2979 | 332 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
333 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
334 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
335 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
336 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
337 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
338 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
339 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 340 |
2979 | 341 z3 += z5; |
342 z4 += z5; | |
2967 | 343 |
2979 | 344 tmp0 += z1 + z3; |
345 tmp1 += z2 + z4; | |
346 tmp2 += z2 + z3; | |
347 tmp3 += z1 + z4; | |
348 } else { | |
349 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
350 z2 = d5 + d3; | |
351 z3 = d7 + d3; | |
352 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
2967 | 353 |
2979 | 354 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
355 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
356 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
357 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
358 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
359 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
360 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
2967 | 361 |
2979 | 362 z3 += z5; |
363 z4 += z5; | |
2967 | 364 |
2979 | 365 tmp0 += z1 + z3; |
366 tmp1 += z2 + z4; | |
367 tmp2 += z2 + z3; | |
368 tmp3 = z1 + z4; | |
369 } | |
370 } else { | |
371 if (d1) { | |
372 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
373 z1 = d7 + d1; | |
374 z4 = d5 + d1; | |
375 z5 = MULTIPLY(d7 + z4, FIX_1_175875602); | |
2967 | 376 |
2979 | 377 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
378 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
379 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
380 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
381 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
382 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
383 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 384 |
2979 | 385 z3 += z5; |
386 z4 += z5; | |
2967 | 387 |
2979 | 388 tmp0 += z1 + z3; |
389 tmp1 += z2 + z4; | |
390 tmp2 = z2 + z3; | |
391 tmp3 += z1 + z4; | |
392 } else { | |
393 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
394 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
395 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
396 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
397 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
398 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
399 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
400 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
2967 | 401 |
2979 | 402 z3 += z5; |
403 z4 += z5; | |
2967 | 404 |
2979 | 405 tmp0 += z3; |
406 tmp1 += z4; | |
407 tmp2 = z2 + z3; | |
408 tmp3 = z1 + z4; | |
409 } | |
410 } | |
411 } else { | |
412 if (d3) { | |
413 if (d1) { | |
414 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
415 z1 = d7 + d1; | |
416 z3 = d7 + d3; | |
417 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
2967 | 418 |
2979 | 419 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
420 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
421 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
422 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
423 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
424 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
425 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
2967 | 426 |
2979 | 427 z3 += z5; |
428 z4 += z5; | |
2967 | 429 |
2979 | 430 tmp0 += z1 + z3; |
431 tmp1 = z2 + z4; | |
432 tmp2 += z2 + z3; | |
433 tmp3 += z1 + z4; | |
434 } else { | |
435 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
436 z3 = d7 + d3; | |
2967 | 437 |
2979 | 438 tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
439 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
440 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
441 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
442 z5 = MULTIPLY(z3, FIX_1_175875602); | |
443 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
2967 | 444 |
2979 | 445 tmp0 += z3; |
446 tmp1 = z2 + z5; | |
447 tmp2 += z3; | |
448 tmp3 = z1 + z5; | |
449 } | |
450 } else { | |
451 if (d1) { | |
452 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
453 z1 = d7 + d1; | |
454 z5 = MULTIPLY(z1, FIX_1_175875602); | |
0 | 455 |
2979 | 456 z1 = MULTIPLY(z1, FIX_0_275899380); |
457 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
458 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
459 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
460 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
0 | 461 |
2979 | 462 tmp0 += z1; |
463 tmp1 = z4 + z5; | |
464 tmp2 = z3 + z5; | |
465 tmp3 += z1; | |
466 } else { | |
467 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
468 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
469 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
470 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
471 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
472 } | |
473 } | |
474 } | |
0 | 475 } else { |
2979 | 476 if (d5) { |
477 if (d3) { | |
478 if (d1) { | |
479 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
480 z2 = d5 + d3; | |
481 z4 = d5 + d1; | |
482 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
2967 | 483 |
2979 | 484 tmp1 = MULTIPLY(d5, FIX_2_053119869); |
485 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
486 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
487 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
488 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
489 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
490 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 491 |
2979 | 492 z3 += z5; |
493 z4 += z5; | |
2967 | 494 |
2979 | 495 tmp0 = z1 + z3; |
496 tmp1 += z2 + z4; | |
497 tmp2 += z2 + z3; | |
498 tmp3 += z1 + z4; | |
499 } else { | |
500 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
501 z2 = d5 + d3; | |
2967 | 502 |
2979 | 503 z5 = MULTIPLY(z2, FIX_1_175875602); |
504 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
505 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
506 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
507 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
508 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
2967 | 509 |
2979 | 510 tmp0 = z3 + z5; |
511 tmp1 += z2; | |
512 tmp2 += z2; | |
513 tmp3 = z4 + z5; | |
514 } | |
515 } else { | |
516 if (d1) { | |
517 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
518 z4 = d5 + d1; | |
2967 | 519 |
2979 | 520 z5 = MULTIPLY(z4, FIX_1_175875602); |
521 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
522 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
523 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
524 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
525 z4 = MULTIPLY(z4, FIX_0_785694958); | |
2967 | 526 |
2979 | 527 tmp0 = z1 + z5; |
528 tmp1 += z4; | |
529 tmp2 = z2 + z5; | |
530 tmp3 += z4; | |
531 } else { | |
532 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
533 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
534 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
535 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
536 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
537 } | |
538 } | |
539 } else { | |
540 if (d3) { | |
541 if (d1) { | |
542 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
543 z5 = d1 + d3; | |
544 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
545 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
546 z1 = MULTIPLY(d1, FIX_1_061594337); | |
547 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
548 z4 = MULTIPLY(z5, FIX_0_785694958); | |
549 z5 = MULTIPLY(z5, FIX_1_175875602); | |
2967 | 550 |
2979 | 551 tmp0 = z1 - z4; |
552 tmp1 = z2 + z4; | |
553 tmp2 += z5; | |
554 tmp3 += z5; | |
555 } else { | |
556 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
557 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
558 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
559 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
560 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
561 } | |
562 } else { | |
563 if (d1) { | |
564 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
565 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
566 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
567 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
568 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
569 } else { | |
570 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
571 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
572 } | |
573 } | |
574 } | |
0 | 575 } |
576 } | |
577 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
578 | |
579 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
580 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
581 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
582 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
583 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
584 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
585 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
586 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
587 | |
2979 | 588 dataptr += DCTSIZE; /* advance pointer to next row */ |
0 | 589 } |
590 | |
591 /* Pass 2: process columns. */ | |
592 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
593 /* and also undo the PASS1_BITS scaling. */ | |
594 | |
595 dataptr = data; | |
596 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
597 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
598 * However, the row calculation has created many nonzero AC terms, so the | |
599 * simplification applies less often (typically 5% to 10% of the time). | |
600 * On machines with very fast multiplication, it's possible that the | |
601 * test takes more time than it's worth. In that case this section | |
602 * may be commented out. | |
603 */ | |
604 | |
605 d0 = dataptr[DCTSIZE*0]; | |
606 d1 = dataptr[DCTSIZE*1]; | |
607 d2 = dataptr[DCTSIZE*2]; | |
608 d3 = dataptr[DCTSIZE*3]; | |
609 d4 = dataptr[DCTSIZE*4]; | |
610 d5 = dataptr[DCTSIZE*5]; | |
611 d6 = dataptr[DCTSIZE*6]; | |
612 d7 = dataptr[DCTSIZE*7]; | |
613 | |
614 /* Even part: reverse the even part of the forward DCT. */ | |
615 /* The rotator is sqrt(2)*c(-6). */ | |
616 if (d6) { | |
2979 | 617 if (d2) { |
618 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
619 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
620 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
621 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
0 | 622 |
2979 | 623 tmp0 = (d0 + d4) << CONST_BITS; |
624 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 625 |
2979 | 626 tmp10 = tmp0 + tmp3; |
627 tmp13 = tmp0 - tmp3; | |
628 tmp11 = tmp1 + tmp2; | |
629 tmp12 = tmp1 - tmp2; | |
630 } else { | |
631 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
632 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
633 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
0 | 634 |
2979 | 635 tmp0 = (d0 + d4) << CONST_BITS; |
636 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 637 |
2979 | 638 tmp10 = tmp0 + tmp3; |
639 tmp13 = tmp0 - tmp3; | |
640 tmp11 = tmp1 + tmp2; | |
641 tmp12 = tmp1 - tmp2; | |
642 } | |
2263 | 643 } else { |
2979 | 644 if (d2) { |
645 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
646 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
647 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
0 | 648 |
2979 | 649 tmp0 = (d0 + d4) << CONST_BITS; |
650 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 651 |
2979 | 652 tmp10 = tmp0 + tmp3; |
653 tmp13 = tmp0 - tmp3; | |
654 tmp11 = tmp1 + tmp2; | |
655 tmp12 = tmp1 - tmp2; | |
656 } else { | |
657 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
658 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
659 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
660 } | |
0 | 661 } |
662 | |
663 /* Odd part per figure 8; the matrix is unitary and hence its | |
664 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
665 */ | |
666 if (d7) { | |
2979 | 667 if (d5) { |
668 if (d3) { | |
669 if (d1) { | |
670 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
671 z1 = d7 + d1; | |
672 z2 = d5 + d3; | |
673 z3 = d7 + d3; | |
674 z4 = d5 + d1; | |
675 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
2967 | 676 |
2979 | 677 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
678 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
679 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
680 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
681 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
682 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
683 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
684 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 685 |
2979 | 686 z3 += z5; |
687 z4 += z5; | |
2967 | 688 |
2979 | 689 tmp0 += z1 + z3; |
690 tmp1 += z2 + z4; | |
691 tmp2 += z2 + z3; | |
692 tmp3 += z1 + z4; | |
693 } else { | |
694 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
695 z1 = d7; | |
696 z2 = d5 + d3; | |
697 z3 = d7 + d3; | |
698 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
2967 | 699 |
2979 | 700 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
701 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
702 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
703 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
704 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
705 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
706 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
2967 | 707 |
2979 | 708 z3 += z5; |
709 z4 += z5; | |
2967 | 710 |
2979 | 711 tmp0 += z1 + z3; |
712 tmp1 += z2 + z4; | |
713 tmp2 += z2 + z3; | |
714 tmp3 = z1 + z4; | |
715 } | |
716 } else { | |
717 if (d1) { | |
718 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
719 z1 = d7 + d1; | |
720 z2 = d5; | |
721 z3 = d7; | |
722 z4 = d5 + d1; | |
723 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
2967 | 724 |
2979 | 725 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
726 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
727 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
728 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
729 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
730 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
731 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 732 |
2979 | 733 z3 += z5; |
734 z4 += z5; | |
2967 | 735 |
2979 | 736 tmp0 += z1 + z3; |
737 tmp1 += z2 + z4; | |
738 tmp2 = z2 + z3; | |
739 tmp3 += z1 + z4; | |
740 } else { | |
741 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
742 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
743 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
744 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
745 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
746 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
747 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
748 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
2967 | 749 |
2979 | 750 z3 += z5; |
751 z4 += z5; | |
2967 | 752 |
2979 | 753 tmp0 += z3; |
754 tmp1 += z4; | |
755 tmp2 = z2 + z3; | |
756 tmp3 = z1 + z4; | |
757 } | |
758 } | |
759 } else { | |
760 if (d3) { | |
761 if (d1) { | |
762 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
763 z1 = d7 + d1; | |
764 z3 = d7 + d3; | |
765 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
2967 | 766 |
2979 | 767 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
768 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
769 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
770 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
771 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
772 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
773 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
2967 | 774 |
2979 | 775 z3 += z5; |
776 z4 += z5; | |
2967 | 777 |
2979 | 778 tmp0 += z1 + z3; |
779 tmp1 = z2 + z4; | |
780 tmp2 += z2 + z3; | |
781 tmp3 += z1 + z4; | |
782 } else { | |
783 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
784 z3 = d7 + d3; | |
2967 | 785 |
2979 | 786 tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
787 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
788 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
789 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
790 z5 = MULTIPLY(z3, FIX_1_175875602); | |
791 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
2967 | 792 |
2979 | 793 tmp0 += z3; |
794 tmp1 = z2 + z5; | |
795 tmp2 += z3; | |
796 tmp3 = z1 + z5; | |
797 } | |
798 } else { | |
799 if (d1) { | |
800 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
801 z1 = d7 + d1; | |
802 z5 = MULTIPLY(z1, FIX_1_175875602); | |
0 | 803 |
2979 | 804 z1 = MULTIPLY(z1, FIX_0_275899380); |
805 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
806 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
807 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
808 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
0 | 809 |
2979 | 810 tmp0 += z1; |
811 tmp1 = z4 + z5; | |
812 tmp2 = z3 + z5; | |
813 tmp3 += z1; | |
814 } else { | |
815 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
816 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
817 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
818 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
819 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
820 } | |
821 } | |
822 } | |
0 | 823 } else { |
2979 | 824 if (d5) { |
825 if (d3) { | |
826 if (d1) { | |
827 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
828 z2 = d5 + d3; | |
829 z4 = d5 + d1; | |
830 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
2967 | 831 |
2979 | 832 tmp1 = MULTIPLY(d5, FIX_2_053119869); |
833 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
834 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
835 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
836 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
837 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
838 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 839 |
2979 | 840 z3 += z5; |
841 z4 += z5; | |
2967 | 842 |
2979 | 843 tmp0 = z1 + z3; |
844 tmp1 += z2 + z4; | |
845 tmp2 += z2 + z3; | |
846 tmp3 += z1 + z4; | |
847 } else { | |
848 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
849 z2 = d5 + d3; | |
2967 | 850 |
2979 | 851 z5 = MULTIPLY(z2, FIX_1_175875602); |
852 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
853 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
854 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
855 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
856 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
2967 | 857 |
2979 | 858 tmp0 = z3 + z5; |
859 tmp1 += z2; | |
860 tmp2 += z2; | |
861 tmp3 = z4 + z5; | |
862 } | |
863 } else { | |
864 if (d1) { | |
865 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
866 z4 = d5 + d1; | |
2967 | 867 |
2979 | 868 z5 = MULTIPLY(z4, FIX_1_175875602); |
869 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
870 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
871 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
872 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
873 z4 = MULTIPLY(z4, FIX_0_785694958); | |
2967 | 874 |
2979 | 875 tmp0 = z1 + z5; |
876 tmp1 += z4; | |
877 tmp2 = z2 + z5; | |
878 tmp3 += z4; | |
879 } else { | |
880 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
881 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
882 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
883 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
884 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
885 } | |
886 } | |
887 } else { | |
888 if (d3) { | |
889 if (d1) { | |
890 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
891 z5 = d1 + d3; | |
892 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
893 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
894 z1 = MULTIPLY(d1, FIX_1_061594337); | |
895 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
896 z4 = MULTIPLY(z5, FIX_0_785694958); | |
897 z5 = MULTIPLY(z5, FIX_1_175875602); | |
2967 | 898 |
2979 | 899 tmp0 = z1 - z4; |
900 tmp1 = z2 + z4; | |
901 tmp2 += z5; | |
902 tmp3 += z5; | |
903 } else { | |
904 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
905 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
906 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
907 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
908 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
909 } | |
910 } else { | |
911 if (d1) { | |
912 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
913 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
914 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
915 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
916 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
917 } else { | |
918 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
919 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
920 } | |
921 } | |
922 } | |
0 | 923 } |
924 | |
925 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
926 | |
927 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, | |
2979 | 928 CONST_BITS+PASS1_BITS+3); |
0 | 929 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, |
2979 | 930 CONST_BITS+PASS1_BITS+3); |
0 | 931 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, |
2979 | 932 CONST_BITS+PASS1_BITS+3); |
0 | 933 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, |
2979 | 934 CONST_BITS+PASS1_BITS+3); |
0 | 935 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, |
2979 | 936 CONST_BITS+PASS1_BITS+3); |
0 | 937 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, |
2979 | 938 CONST_BITS+PASS1_BITS+3); |
0 | 939 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, |
2979 | 940 CONST_BITS+PASS1_BITS+3); |
0 | 941 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, |
2979 | 942 CONST_BITS+PASS1_BITS+3); |
2967 | 943 |
2979 | 944 dataptr++; /* advance pointer to next column */ |
0 | 945 } |
946 } | |
947 | |
2256 | 948 #undef DCTSIZE |
949 #define DCTSIZE 4 | |
950 #define DCTSTRIDE 8 | |
951 | |
952 void j_rev_dct4(DCTBLOCK data) | |
953 { | |
954 int32_t tmp0, tmp1, tmp2, tmp3; | |
955 int32_t tmp10, tmp11, tmp12, tmp13; | |
956 int32_t z1; | |
957 int32_t d0, d2, d4, d6; | |
958 register DCTELEM *dataptr; | |
959 int rowctr; | |
2262 | 960 |
2256 | 961 /* Pass 1: process rows. */ |
962 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
963 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
964 | |
2262 | 965 data[0] += 4; |
2967 | 966 |
2256 | 967 dataptr = data; |
968 | |
969 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
970 /* Due to quantization, we will usually find that many of the input | |
971 * coefficients are zero, especially the AC terms. We can exploit this | |
972 * by short-circuiting the IDCT calculation for any row in which all | |
973 * the AC terms are zero. In that case each output is equal to the | |
974 * DC coefficient (with scale factor as needed). | |
975 * With typical images and quantization tables, half or more of the | |
976 * row DCT calculations can be simplified this way. | |
977 */ | |
978 | |
979 register int *idataptr = (int*)dataptr; | |
980 | |
981 d0 = dataptr[0]; | |
982 d2 = dataptr[1]; | |
983 d4 = dataptr[2]; | |
984 d6 = dataptr[3]; | |
985 | |
986 if ((d2 | d4 | d6) == 0) { | |
987 /* AC terms all zero */ | |
988 if (d0) { | |
2979 | 989 /* Compute a 32 bit value to assign. */ |
990 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
991 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
2967 | 992 |
2979 | 993 idataptr[0] = v; |
994 idataptr[1] = v; | |
2256 | 995 } |
2967 | 996 |
2979 | 997 dataptr += DCTSTRIDE; /* advance pointer to next row */ |
2256 | 998 continue; |
999 } | |
2967 | 1000 |
2256 | 1001 /* Even part: reverse the even part of the forward DCT. */ |
1002 /* The rotator is sqrt(2)*c(-6). */ | |
1003 if (d6) { | |
2979 | 1004 if (d2) { |
1005 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
1006 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
1007 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
1008 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
2256 | 1009 |
2979 | 1010 tmp0 = (d0 + d4) << CONST_BITS; |
1011 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1012 |
2979 | 1013 tmp10 = tmp0 + tmp3; |
1014 tmp13 = tmp0 - tmp3; | |
1015 tmp11 = tmp1 + tmp2; | |
1016 tmp12 = tmp1 - tmp2; | |
1017 } else { | |
1018 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
1019 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
1020 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
2256 | 1021 |
2979 | 1022 tmp0 = (d0 + d4) << CONST_BITS; |
1023 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1024 |
2979 | 1025 tmp10 = tmp0 + tmp3; |
1026 tmp13 = tmp0 - tmp3; | |
1027 tmp11 = tmp1 + tmp2; | |
1028 tmp12 = tmp1 - tmp2; | |
1029 } | |
2262 | 1030 } else { |
2979 | 1031 if (d2) { |
1032 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
1033 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
1034 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
2256 | 1035 |
2979 | 1036 tmp0 = (d0 + d4) << CONST_BITS; |
1037 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1038 |
2979 | 1039 tmp10 = tmp0 + tmp3; |
1040 tmp13 = tmp0 - tmp3; | |
1041 tmp11 = tmp1 + tmp2; | |
1042 tmp12 = tmp1 - tmp2; | |
1043 } else { | |
1044 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
1045 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
1046 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
1047 } | |
2256 | 1048 } |
1049 | |
1050 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
1051 | |
1052 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); | |
1053 dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); | |
1054 dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); | |
1055 dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); | |
1056 | |
2979 | 1057 dataptr += DCTSTRIDE; /* advance pointer to next row */ |
2256 | 1058 } |
1059 | |
1060 /* Pass 2: process columns. */ | |
1061 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
1062 /* and also undo the PASS1_BITS scaling. */ | |
1063 | |
1064 dataptr = data; | |
1065 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
1066 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
1067 * However, the row calculation has created many nonzero AC terms, so the | |
1068 * simplification applies less often (typically 5% to 10% of the time). | |
1069 * On machines with very fast multiplication, it's possible that the | |
1070 * test takes more time than it's worth. In that case this section | |
1071 * may be commented out. | |
1072 */ | |
1073 | |
1074 d0 = dataptr[DCTSTRIDE*0]; | |
1075 d2 = dataptr[DCTSTRIDE*1]; | |
1076 d4 = dataptr[DCTSTRIDE*2]; | |
1077 d6 = dataptr[DCTSTRIDE*3]; | |
1078 | |
1079 /* Even part: reverse the even part of the forward DCT. */ | |
1080 /* The rotator is sqrt(2)*c(-6). */ | |
1081 if (d6) { | |
2979 | 1082 if (d2) { |
1083 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
1084 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
1085 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
1086 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
2256 | 1087 |
2979 | 1088 tmp0 = (d0 + d4) << CONST_BITS; |
1089 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1090 |
2979 | 1091 tmp10 = tmp0 + tmp3; |
1092 tmp13 = tmp0 - tmp3; | |
1093 tmp11 = tmp1 + tmp2; | |
1094 tmp12 = tmp1 - tmp2; | |
1095 } else { | |
1096 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
1097 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
1098 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
2256 | 1099 |
2979 | 1100 tmp0 = (d0 + d4) << CONST_BITS; |
1101 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1102 |
2979 | 1103 tmp10 = tmp0 + tmp3; |
1104 tmp13 = tmp0 - tmp3; | |
1105 tmp11 = tmp1 + tmp2; | |
1106 tmp12 = tmp1 - tmp2; | |
1107 } | |
2262 | 1108 } else { |
2979 | 1109 if (d2) { |
1110 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
1111 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
1112 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
2256 | 1113 |
2979 | 1114 tmp0 = (d0 + d4) << CONST_BITS; |
1115 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1116 |
2979 | 1117 tmp10 = tmp0 + tmp3; |
1118 tmp13 = tmp0 - tmp3; | |
1119 tmp11 = tmp1 + tmp2; | |
1120 tmp12 = tmp1 - tmp2; | |
1121 } else { | |
1122 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
1123 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
1124 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
1125 } | |
2256 | 1126 } |
1127 | |
1128 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
1129 | |
2262 | 1130 dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); |
1131 dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); | |
1132 dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); | |
1133 dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); | |
2967 | 1134 |
2979 | 1135 dataptr++; /* advance pointer to next column */ |
2256 | 1136 } |
1137 } | |
1138 | |
2257 | 1139 void j_rev_dct2(DCTBLOCK data){ |
1140 int d00, d01, d10, d11; | |
1141 | |
1142 data[0] += 4; | |
1143 d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; | |
1144 d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; | |
1145 d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; | |
1146 d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; | |
2967 | 1147 |
2257 | 1148 data[0+0*DCTSTRIDE]= (d00 + d10)>>3; |
1149 data[1+0*DCTSTRIDE]= (d01 + d11)>>3; | |
1150 data[0+1*DCTSTRIDE]= (d00 - d10)>>3; | |
1151 data[1+1*DCTSTRIDE]= (d01 - d11)>>3; | |
1152 } | |
2256 | 1153 |
2259 | 1154 void j_rev_dct1(DCTBLOCK data){ |
1155 data[0] = (data[0] + 4)>>3; | |
1156 } | |
1157 | |
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1158 #undef FIX |
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1159 #undef CONST_BITS |