Mercurial > libavcodec.hg
annotate jrevdct.c @ 11557:53822d92c3f7 libavcodec
Make sure the EC code does not attempt to use inter based concealment if there
is no reference frame available. (this can happen because the EC code will attempt
to use reference frames even for I/IDR frames)
author | michael |
---|---|
date | Tue, 30 Mar 2010 20:46:46 +0000 |
parents | 674090f13019 |
children | 7dd2a45249a9 |
rev | line source |
---|---|
0 | 1 /* |
2 * jrevdct.c | |
3 * | |
4 * This file is part of the Independent JPEG Group's software. | |
3669
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
5 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
6 * The authors make NO WARRANTY or representation, either express or implied, |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
7 * with respect to this software, its quality, accuracy, merchantability, or |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
8 * fitness for a particular purpose. This software is provided "AS IS", and |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
9 * you, its user, assume the entire risk as to its quality and accuracy. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
10 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
11 * This software is copyright (C) 1991, 1992, Thomas G. Lane. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
12 * All Rights Reserved except as specified below. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
13 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
14 * Permission is hereby granted to use, copy, modify, and distribute this |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
15 * software (or portions thereof) for any purpose, without fee, subject to |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
16 * these conditions: |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
17 * (1) If any part of the source code for this software is distributed, then |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
18 * this README file must be included, with this copyright and no-warranty |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
19 * notice unaltered; and any additions, deletions, or changes to the original |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
20 * files must be clearly indicated in accompanying documentation. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
21 * (2) If only executable code is distributed, then the accompanying |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
22 * documentation must state that "this software is based in part on the work |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
23 * of the Independent JPEG Group". |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
24 * (3) Permission for use of this software is granted only if the user accepts |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
25 * full responsibility for any undesirable consequences; the authors accept |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
26 * NO LIABILITY for damages of any kind. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
27 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
28 * These conditions apply to any software derived from or based on the IJG |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
29 * code, not just to the unmodified library. If you use our work, you ought |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
30 * to acknowledge us. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
31 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
32 * Permission is NOT granted for the use of any IJG author's name or company |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
33 * name in advertising or publicity relating to this software or products |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
34 * derived from it. This software may be referred to only as "the Independent |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
35 * JPEG Group's software". |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
36 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
37 * We specifically permit and encourage the use of this software as the basis |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
38 * of commercial products, provided that all warranty or liability claims are |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
39 * assumed by the product vendor. |
0 | 40 * |
41 * This file contains the basic inverse-DCT transformation subroutine. | |
42 * | |
43 * This implementation is based on an algorithm described in | |
44 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | |
45 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | |
46 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | |
47 * The primary algorithm described there uses 11 multiplies and 29 adds. | |
48 * We use their alternate method with 12 multiplies and 32 adds. | |
49 * The advantage of this method is that no data path contains more than one | |
50 * multiplication; this allows a very simple and accurate implementation in | |
51 * scaled fixed-point arithmetic, with a minimal number of shifts. | |
2967 | 52 * |
0 | 53 * I've made lots of modifications to attempt to take advantage of the |
54 * sparse nature of the DCT matrices we're getting. Although the logic | |
55 * is cumbersome, it's straightforward and the resulting code is much | |
56 * faster. | |
57 * | |
58 * A better way to do this would be to pass in the DCT block as a sparse | |
59 * matrix, perhaps with the difference cases encoded. | |
60 */ | |
2967 | 61 |
1106 | 62 /** |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
6763
diff
changeset
|
63 * @file libavcodec/jrevdct.c |
1106 | 64 * Independent JPEG Group's LLM idct. |
65 */ | |
2967 | 66 |
6763 | 67 #include "libavutil/common.h" |
0 | 68 #include "dsputil.h" |
69 | |
70 #define EIGHT_BIT_SAMPLES | |
71 | |
72 #define DCTSIZE 8 | |
73 #define DCTSIZE2 64 | |
74 | |
75 #define GLOBAL | |
76 | |
77 #define RIGHT_SHIFT(x, n) ((x) >> (n)) | |
78 | |
79 typedef DCTELEM DCTBLOCK[DCTSIZE2]; | |
80 | |
81 #define CONST_BITS 13 | |
82 | |
83 /* | |
84 * This routine is specialized to the case DCTSIZE = 8. | |
85 */ | |
86 | |
87 #if DCTSIZE != 8 | |
88 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ | |
89 #endif | |
90 | |
91 | |
92 /* | |
93 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT | |
94 * on each column. Direct algorithms are also available, but they are | |
95 * much more complex and seem not to be any faster when reduced to code. | |
96 * | |
97 * The poop on this scaling stuff is as follows: | |
98 * | |
99 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) | |
100 * larger than the true IDCT outputs. The final outputs are therefore | |
101 * a factor of N larger than desired; since N=8 this can be cured by | |
102 * a simple right shift at the end of the algorithm. The advantage of | |
103 * this arrangement is that we save two multiplications per 1-D IDCT, | |
104 * because the y0 and y4 inputs need not be divided by sqrt(N). | |
105 * | |
106 * We have to do addition and subtraction of the integer inputs, which | |
107 * is no problem, and multiplication by fractional constants, which is | |
108 * a problem to do in integer arithmetic. We multiply all the constants | |
109 * by CONST_SCALE and convert them to integer constants (thus retaining | |
110 * CONST_BITS bits of precision in the constants). After doing a | |
111 * multiplication we have to divide the product by CONST_SCALE, with proper | |
112 * rounding, to produce the correct output. This division can be done | |
113 * cheaply as a right shift of CONST_BITS bits. We postpone shifting | |
114 * as long as possible so that partial sums can be added together with | |
115 * full fractional precision. | |
116 * | |
117 * The outputs of the first pass are scaled up by PASS1_BITS bits so that | |
118 * they are represented to better-than-integral precision. These outputs | |
119 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | |
120 * with the recommended scaling. (To scale up 12-bit sample data further, an | |
121 * intermediate int32 array would be needed.) | |
122 * | |
123 * To avoid overflow of the 32-bit intermediate results in pass 2, we must | |
124 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis | |
125 * shows that the values given below are the most effective. | |
126 */ | |
127 | |
128 #ifdef EIGHT_BIT_SAMPLES | |
129 #define PASS1_BITS 2 | |
130 #else | |
2979 | 131 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
0 | 132 #endif |
133 | |
2979 | 134 #define ONE ((int32_t) 1) |
0 | 135 |
136 #define CONST_SCALE (ONE << CONST_BITS) | |
137 | |
138 /* Convert a positive real constant to an integer scaled by CONST_SCALE. | |
139 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, | |
140 * you will pay a significant penalty in run time. In that case, figure | |
141 * the correct integer constant values and insert them by hand. | |
142 */ | |
143 | |
144 /* Actually FIX is no longer used, we precomputed them all */ | |
2979 | 145 #define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) |
0 | 146 |
1064 | 147 /* Descale and correctly round an int32_t value that's scaled by N bits. |
0 | 148 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding |
149 * the fudge factor is correct for either sign of X. | |
150 */ | |
151 | |
152 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) | |
153 | |
1064 | 154 /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. |
0 | 155 * For 8-bit samples with the recommended scaling, all the variable |
156 * and constant values involved are no more than 16 bits wide, so a | |
157 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; | |
158 * this provides a useful speedup on many machines. | |
159 * There is no way to specify a 16x16->32 multiply in portable C, but | |
160 * some C compilers will do the right thing if you provide the correct | |
161 * combination of casts. | |
162 * NB: for 12-bit samples, a full 32-bit multiplication will be needed. | |
163 */ | |
164 | |
165 #ifdef EIGHT_BIT_SAMPLES | |
2979 | 166 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ |
1064 | 167 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) |
0 | 168 #endif |
2979 | 169 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ |
1064 | 170 #define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) |
0 | 171 #endif |
172 #endif | |
173 | |
2979 | 174 #ifndef MULTIPLY /* default definition */ |
0 | 175 #define MULTIPLY(var,const) ((var) * (const)) |
176 #endif | |
177 | |
178 | |
2967 | 179 /* |
0 | 180 Unlike our decoder where we approximate the FIXes, we need to use exact |
2967 | 181 ones here or successive P-frames will drift too much with Reference frame coding |
0 | 182 */ |
183 #define FIX_0_211164243 1730 | |
184 #define FIX_0_275899380 2260 | |
185 #define FIX_0_298631336 2446 | |
186 #define FIX_0_390180644 3196 | |
187 #define FIX_0_509795579 4176 | |
188 #define FIX_0_541196100 4433 | |
189 #define FIX_0_601344887 4926 | |
190 #define FIX_0_765366865 6270 | |
191 #define FIX_0_785694958 6436 | |
192 #define FIX_0_899976223 7373 | |
193 #define FIX_1_061594337 8697 | |
194 #define FIX_1_111140466 9102 | |
195 #define FIX_1_175875602 9633 | |
196 #define FIX_1_306562965 10703 | |
197 #define FIX_1_387039845 11363 | |
198 #define FIX_1_451774981 11893 | |
199 #define FIX_1_501321110 12299 | |
200 #define FIX_1_662939225 13623 | |
201 #define FIX_1_847759065 15137 | |
202 #define FIX_1_961570560 16069 | |
203 #define FIX_2_053119869 16819 | |
204 #define FIX_2_172734803 17799 | |
205 #define FIX_2_562915447 20995 | |
206 #define FIX_3_072711026 25172 | |
207 | |
208 /* | |
209 * Perform the inverse DCT on one block of coefficients. | |
210 */ | |
211 | |
212 void j_rev_dct(DCTBLOCK data) | |
213 { | |
1064 | 214 int32_t tmp0, tmp1, tmp2, tmp3; |
215 int32_t tmp10, tmp11, tmp12, tmp13; | |
216 int32_t z1, z2, z3, z4, z5; | |
217 int32_t d0, d1, d2, d3, d4, d5, d6, d7; | |
0 | 218 register DCTELEM *dataptr; |
219 int rowctr; | |
2967 | 220 |
0 | 221 /* Pass 1: process rows. */ |
222 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
223 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
224 | |
225 dataptr = data; | |
226 | |
227 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
228 /* Due to quantization, we will usually find that many of the input | |
229 * coefficients are zero, especially the AC terms. We can exploit this | |
230 * by short-circuiting the IDCT calculation for any row in which all | |
231 * the AC terms are zero. In that case each output is equal to the | |
232 * DC coefficient (with scale factor as needed). | |
233 * With typical images and quantization tables, half or more of the | |
234 * row DCT calculations can be simplified this way. | |
235 */ | |
236 | |
237 register int *idataptr = (int*)dataptr; | |
238 | |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
239 /* WARNING: we do the same permutation as MMX idct to simplify the |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
240 video core */ |
0 | 241 d0 = dataptr[0]; |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
242 d2 = dataptr[1]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
243 d4 = dataptr[2]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
244 d6 = dataptr[3]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
245 d1 = dataptr[4]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
246 d3 = dataptr[5]; |
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
247 d5 = dataptr[6]; |
0 | 248 d7 = dataptr[7]; |
249 | |
36
23723a0ebd24
permuted coefs in normal IDCT to avoid having different cases there
glantau
parents:
0
diff
changeset
|
250 if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { |
0 | 251 /* AC terms all zero */ |
252 if (d0) { | |
2979 | 253 /* Compute a 32 bit value to assign. */ |
254 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
255 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
2967 | 256 |
2979 | 257 idataptr[0] = v; |
258 idataptr[1] = v; | |
259 idataptr[2] = v; | |
260 idataptr[3] = v; | |
0 | 261 } |
2967 | 262 |
2979 | 263 dataptr += DCTSIZE; /* advance pointer to next row */ |
0 | 264 continue; |
265 } | |
266 | |
267 /* Even part: reverse the even part of the forward DCT. */ | |
268 /* The rotator is sqrt(2)*c(-6). */ | |
269 { | |
270 if (d6) { | |
2979 | 271 if (d2) { |
272 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
273 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
274 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
275 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
0 | 276 |
2979 | 277 tmp0 = (d0 + d4) << CONST_BITS; |
278 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 279 |
2979 | 280 tmp10 = tmp0 + tmp3; |
281 tmp13 = tmp0 - tmp3; | |
282 tmp11 = tmp1 + tmp2; | |
283 tmp12 = tmp1 - tmp2; | |
284 } else { | |
285 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
286 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
287 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
0 | 288 |
2979 | 289 tmp0 = (d0 + d4) << CONST_BITS; |
290 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 291 |
2979 | 292 tmp10 = tmp0 + tmp3; |
293 tmp13 = tmp0 - tmp3; | |
294 tmp11 = tmp1 + tmp2; | |
295 tmp12 = tmp1 - tmp2; | |
296 } | |
2263 | 297 } else { |
2979 | 298 if (d2) { |
299 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
300 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
301 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
0 | 302 |
2979 | 303 tmp0 = (d0 + d4) << CONST_BITS; |
304 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 305 |
2979 | 306 tmp10 = tmp0 + tmp3; |
307 tmp13 = tmp0 - tmp3; | |
308 tmp11 = tmp1 + tmp2; | |
309 tmp12 = tmp1 - tmp2; | |
310 } else { | |
311 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
312 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
313 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
314 } | |
0 | 315 } |
316 | |
317 /* Odd part per figure 8; the matrix is unitary and hence its | |
318 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
319 */ | |
320 | |
321 if (d7) { | |
2979 | 322 if (d5) { |
323 if (d3) { | |
324 if (d1) { | |
325 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
326 z1 = d7 + d1; | |
327 z2 = d5 + d3; | |
328 z3 = d7 + d3; | |
329 z4 = d5 + d1; | |
330 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
2967 | 331 |
2979 | 332 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
333 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
334 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
335 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
336 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
337 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
338 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
339 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 340 |
2979 | 341 z3 += z5; |
342 z4 += z5; | |
2967 | 343 |
2979 | 344 tmp0 += z1 + z3; |
345 tmp1 += z2 + z4; | |
346 tmp2 += z2 + z3; | |
347 tmp3 += z1 + z4; | |
348 } else { | |
349 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
350 z2 = d5 + d3; | |
351 z3 = d7 + d3; | |
352 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
2967 | 353 |
2979 | 354 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
355 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
356 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
357 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
358 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
359 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
360 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
2967 | 361 |
2979 | 362 z3 += z5; |
363 z4 += z5; | |
2967 | 364 |
2979 | 365 tmp0 += z1 + z3; |
366 tmp1 += z2 + z4; | |
367 tmp2 += z2 + z3; | |
368 tmp3 = z1 + z4; | |
369 } | |
370 } else { | |
371 if (d1) { | |
372 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
373 z1 = d7 + d1; | |
374 z4 = d5 + d1; | |
375 z5 = MULTIPLY(d7 + z4, FIX_1_175875602); | |
2967 | 376 |
2979 | 377 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
378 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
379 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
380 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
381 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
382 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
383 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 384 |
2979 | 385 z3 += z5; |
386 z4 += z5; | |
2967 | 387 |
2979 | 388 tmp0 += z1 + z3; |
389 tmp1 += z2 + z4; | |
390 tmp2 = z2 + z3; | |
391 tmp3 += z1 + z4; | |
392 } else { | |
393 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
394 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
395 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
396 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
397 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
398 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
399 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
400 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
2967 | 401 |
2979 | 402 z3 += z5; |
403 z4 += z5; | |
2967 | 404 |
2979 | 405 tmp0 += z3; |
406 tmp1 += z4; | |
407 tmp2 = z2 + z3; | |
408 tmp3 = z1 + z4; | |
409 } | |
410 } | |
411 } else { | |
412 if (d3) { | |
413 if (d1) { | |
414 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
415 z1 = d7 + d1; | |
416 z3 = d7 + d3; | |
417 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
2967 | 418 |
2979 | 419 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
420 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
421 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
422 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
423 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
424 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
425 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
2967 | 426 |
2979 | 427 z3 += z5; |
428 z4 += z5; | |
2967 | 429 |
2979 | 430 tmp0 += z1 + z3; |
431 tmp1 = z2 + z4; | |
432 tmp2 += z2 + z3; | |
433 tmp3 += z1 + z4; | |
434 } else { | |
435 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
436 z3 = d7 + d3; | |
2967 | 437 |
2979 | 438 tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
439 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
440 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
441 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
442 z5 = MULTIPLY(z3, FIX_1_175875602); | |
443 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
2967 | 444 |
2979 | 445 tmp0 += z3; |
446 tmp1 = z2 + z5; | |
447 tmp2 += z3; | |
448 tmp3 = z1 + z5; | |
449 } | |
450 } else { | |
451 if (d1) { | |
452 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
453 z1 = d7 + d1; | |
454 z5 = MULTIPLY(z1, FIX_1_175875602); | |
0 | 455 |
2979 | 456 z1 = MULTIPLY(z1, FIX_0_275899380); |
457 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
458 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
459 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
460 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
0 | 461 |
2979 | 462 tmp0 += z1; |
463 tmp1 = z4 + z5; | |
464 tmp2 = z3 + z5; | |
465 tmp3 += z1; | |
466 } else { | |
467 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
468 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
469 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
470 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
471 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
472 } | |
473 } | |
474 } | |
0 | 475 } else { |
2979 | 476 if (d5) { |
477 if (d3) { | |
478 if (d1) { | |
479 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
480 z2 = d5 + d3; | |
481 z4 = d5 + d1; | |
482 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
2967 | 483 |
2979 | 484 tmp1 = MULTIPLY(d5, FIX_2_053119869); |
485 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
486 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
487 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
488 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
489 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
490 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 491 |
2979 | 492 z3 += z5; |
493 z4 += z5; | |
2967 | 494 |
2979 | 495 tmp0 = z1 + z3; |
496 tmp1 += z2 + z4; | |
497 tmp2 += z2 + z3; | |
498 tmp3 += z1 + z4; | |
499 } else { | |
500 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
501 z2 = d5 + d3; | |
2967 | 502 |
2979 | 503 z5 = MULTIPLY(z2, FIX_1_175875602); |
504 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
505 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
506 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
507 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
508 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
2967 | 509 |
2979 | 510 tmp0 = z3 + z5; |
511 tmp1 += z2; | |
512 tmp2 += z2; | |
513 tmp3 = z4 + z5; | |
514 } | |
515 } else { | |
516 if (d1) { | |
517 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
518 z4 = d5 + d1; | |
2967 | 519 |
2979 | 520 z5 = MULTIPLY(z4, FIX_1_175875602); |
521 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
522 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
523 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
524 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
525 z4 = MULTIPLY(z4, FIX_0_785694958); | |
2967 | 526 |
2979 | 527 tmp0 = z1 + z5; |
528 tmp1 += z4; | |
529 tmp2 = z2 + z5; | |
530 tmp3 += z4; | |
531 } else { | |
532 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
533 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
534 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
535 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
536 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
537 } | |
538 } | |
539 } else { | |
540 if (d3) { | |
541 if (d1) { | |
542 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
543 z5 = d1 + d3; | |
544 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
545 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
546 z1 = MULTIPLY(d1, FIX_1_061594337); | |
547 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
548 z4 = MULTIPLY(z5, FIX_0_785694958); | |
549 z5 = MULTIPLY(z5, FIX_1_175875602); | |
2967 | 550 |
2979 | 551 tmp0 = z1 - z4; |
552 tmp1 = z2 + z4; | |
553 tmp2 += z5; | |
554 tmp3 += z5; | |
555 } else { | |
556 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
557 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
558 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
559 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
560 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
561 } | |
562 } else { | |
563 if (d1) { | |
564 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
565 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
566 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
567 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
568 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
569 } else { | |
570 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
571 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
572 } | |
573 } | |
574 } | |
0 | 575 } |
576 } | |
577 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
578 | |
579 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); | |
580 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); | |
581 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); | |
582 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); | |
583 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); | |
584 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); | |
585 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); | |
586 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); | |
587 | |
2979 | 588 dataptr += DCTSIZE; /* advance pointer to next row */ |
0 | 589 } |
590 | |
591 /* Pass 2: process columns. */ | |
592 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
593 /* and also undo the PASS1_BITS scaling. */ | |
594 | |
595 dataptr = data; | |
596 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
597 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
598 * However, the row calculation has created many nonzero AC terms, so the | |
599 * simplification applies less often (typically 5% to 10% of the time). | |
600 * On machines with very fast multiplication, it's possible that the | |
601 * test takes more time than it's worth. In that case this section | |
602 * may be commented out. | |
603 */ | |
604 | |
605 d0 = dataptr[DCTSIZE*0]; | |
606 d1 = dataptr[DCTSIZE*1]; | |
607 d2 = dataptr[DCTSIZE*2]; | |
608 d3 = dataptr[DCTSIZE*3]; | |
609 d4 = dataptr[DCTSIZE*4]; | |
610 d5 = dataptr[DCTSIZE*5]; | |
611 d6 = dataptr[DCTSIZE*6]; | |
612 d7 = dataptr[DCTSIZE*7]; | |
613 | |
614 /* Even part: reverse the even part of the forward DCT. */ | |
615 /* The rotator is sqrt(2)*c(-6). */ | |
616 if (d6) { | |
2979 | 617 if (d2) { |
618 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
619 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
620 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
621 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
0 | 622 |
2979 | 623 tmp0 = (d0 + d4) << CONST_BITS; |
624 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 625 |
2979 | 626 tmp10 = tmp0 + tmp3; |
627 tmp13 = tmp0 - tmp3; | |
628 tmp11 = tmp1 + tmp2; | |
629 tmp12 = tmp1 - tmp2; | |
630 } else { | |
631 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
632 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
633 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
0 | 634 |
2979 | 635 tmp0 = (d0 + d4) << CONST_BITS; |
636 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 637 |
2979 | 638 tmp10 = tmp0 + tmp3; |
639 tmp13 = tmp0 - tmp3; | |
640 tmp11 = tmp1 + tmp2; | |
641 tmp12 = tmp1 - tmp2; | |
642 } | |
2263 | 643 } else { |
2979 | 644 if (d2) { |
645 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
646 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
647 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
0 | 648 |
2979 | 649 tmp0 = (d0 + d4) << CONST_BITS; |
650 tmp1 = (d0 - d4) << CONST_BITS; | |
0 | 651 |
2979 | 652 tmp10 = tmp0 + tmp3; |
653 tmp13 = tmp0 - tmp3; | |
654 tmp11 = tmp1 + tmp2; | |
655 tmp12 = tmp1 - tmp2; | |
656 } else { | |
657 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
658 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
659 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
660 } | |
0 | 661 } |
662 | |
663 /* Odd part per figure 8; the matrix is unitary and hence its | |
664 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | |
665 */ | |
666 if (d7) { | |
2979 | 667 if (d5) { |
668 if (d3) { | |
669 if (d1) { | |
670 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ | |
671 z1 = d7 + d1; | |
672 z2 = d5 + d3; | |
673 z3 = d7 + d3; | |
674 z4 = d5 + d1; | |
675 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
2967 | 676 |
2979 | 677 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
678 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
679 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
680 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
681 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
682 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
683 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
684 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 685 |
2979 | 686 z3 += z5; |
687 z4 += z5; | |
2967 | 688 |
2979 | 689 tmp0 += z1 + z3; |
690 tmp1 += z2 + z4; | |
691 tmp2 += z2 + z3; | |
692 tmp3 += z1 + z4; | |
693 } else { | |
694 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ | |
695 z2 = d5 + d3; | |
696 z3 = d7 + d3; | |
697 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); | |
2967 | 698 |
2979 | 699 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
700 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
701 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
702 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
703 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
704 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
705 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
2967 | 706 |
2979 | 707 z3 += z5; |
708 z4 += z5; | |
2967 | 709 |
2979 | 710 tmp0 += z1 + z3; |
711 tmp1 += z2 + z4; | |
712 tmp2 += z2 + z3; | |
713 tmp3 = z1 + z4; | |
714 } | |
715 } else { | |
716 if (d1) { | |
717 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ | |
718 z1 = d7 + d1; | |
719 z3 = d7; | |
720 z4 = d5 + d1; | |
721 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); | |
2967 | 722 |
2979 | 723 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
724 tmp1 = MULTIPLY(d5, FIX_2_053119869); | |
725 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
726 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
727 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
728 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
729 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 730 |
2979 | 731 z3 += z5; |
732 z4 += z5; | |
2967 | 733 |
2979 | 734 tmp0 += z1 + z3; |
735 tmp1 += z2 + z4; | |
736 tmp2 = z2 + z3; | |
737 tmp3 += z1 + z4; | |
738 } else { | |
739 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ | |
740 tmp0 = MULTIPLY(-d7, FIX_0_601344887); | |
741 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
742 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
743 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
744 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
745 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
746 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); | |
2967 | 747 |
2979 | 748 z3 += z5; |
749 z4 += z5; | |
2967 | 750 |
2979 | 751 tmp0 += z3; |
752 tmp1 += z4; | |
753 tmp2 = z2 + z3; | |
754 tmp3 = z1 + z4; | |
755 } | |
756 } | |
757 } else { | |
758 if (d3) { | |
759 if (d1) { | |
760 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ | |
761 z1 = d7 + d1; | |
762 z3 = d7 + d3; | |
763 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); | |
2967 | 764 |
2979 | 765 tmp0 = MULTIPLY(d7, FIX_0_298631336); |
766 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
767 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
768 z1 = MULTIPLY(-z1, FIX_0_899976223); | |
769 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
770 z3 = MULTIPLY(-z3, FIX_1_961570560); | |
771 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
2967 | 772 |
2979 | 773 z3 += z5; |
774 z4 += z5; | |
2967 | 775 |
2979 | 776 tmp0 += z1 + z3; |
777 tmp1 = z2 + z4; | |
778 tmp2 += z2 + z3; | |
779 tmp3 += z1 + z4; | |
780 } else { | |
781 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ | |
782 z3 = d7 + d3; | |
2967 | 783 |
2979 | 784 tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
785 z1 = MULTIPLY(-d7, FIX_0_899976223); | |
786 tmp2 = MULTIPLY(d3, FIX_0_509795579); | |
787 z2 = MULTIPLY(-d3, FIX_2_562915447); | |
788 z5 = MULTIPLY(z3, FIX_1_175875602); | |
789 z3 = MULTIPLY(-z3, FIX_0_785694958); | |
2967 | 790 |
2979 | 791 tmp0 += z3; |
792 tmp1 = z2 + z5; | |
793 tmp2 += z3; | |
794 tmp3 = z1 + z5; | |
795 } | |
796 } else { | |
797 if (d1) { | |
798 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ | |
799 z1 = d7 + d1; | |
800 z5 = MULTIPLY(z1, FIX_1_175875602); | |
0 | 801 |
2979 | 802 z1 = MULTIPLY(z1, FIX_0_275899380); |
803 z3 = MULTIPLY(-d7, FIX_1_961570560); | |
804 tmp0 = MULTIPLY(-d7, FIX_1_662939225); | |
805 z4 = MULTIPLY(-d1, FIX_0_390180644); | |
806 tmp3 = MULTIPLY(d1, FIX_1_111140466); | |
0 | 807 |
2979 | 808 tmp0 += z1; |
809 tmp1 = z4 + z5; | |
810 tmp2 = z3 + z5; | |
811 tmp3 += z1; | |
812 } else { | |
813 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ | |
814 tmp0 = MULTIPLY(-d7, FIX_1_387039845); | |
815 tmp1 = MULTIPLY(d7, FIX_1_175875602); | |
816 tmp2 = MULTIPLY(-d7, FIX_0_785694958); | |
817 tmp3 = MULTIPLY(d7, FIX_0_275899380); | |
818 } | |
819 } | |
820 } | |
0 | 821 } else { |
2979 | 822 if (d5) { |
823 if (d3) { | |
824 if (d1) { | |
825 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ | |
826 z2 = d5 + d3; | |
827 z4 = d5 + d1; | |
828 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); | |
2967 | 829 |
2979 | 830 tmp1 = MULTIPLY(d5, FIX_2_053119869); |
831 tmp2 = MULTIPLY(d3, FIX_3_072711026); | |
832 tmp3 = MULTIPLY(d1, FIX_1_501321110); | |
833 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
834 z2 = MULTIPLY(-z2, FIX_2_562915447); | |
835 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
836 z4 = MULTIPLY(-z4, FIX_0_390180644); | |
2967 | 837 |
2979 | 838 z3 += z5; |
839 z4 += z5; | |
2967 | 840 |
2979 | 841 tmp0 = z1 + z3; |
842 tmp1 += z2 + z4; | |
843 tmp2 += z2 + z3; | |
844 tmp3 += z1 + z4; | |
845 } else { | |
846 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ | |
847 z2 = d5 + d3; | |
2967 | 848 |
2979 | 849 z5 = MULTIPLY(z2, FIX_1_175875602); |
850 tmp1 = MULTIPLY(d5, FIX_1_662939225); | |
851 z4 = MULTIPLY(-d5, FIX_0_390180644); | |
852 z2 = MULTIPLY(-z2, FIX_1_387039845); | |
853 tmp2 = MULTIPLY(d3, FIX_1_111140466); | |
854 z3 = MULTIPLY(-d3, FIX_1_961570560); | |
2967 | 855 |
2979 | 856 tmp0 = z3 + z5; |
857 tmp1 += z2; | |
858 tmp2 += z2; | |
859 tmp3 = z4 + z5; | |
860 } | |
861 } else { | |
862 if (d1) { | |
863 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ | |
864 z4 = d5 + d1; | |
2967 | 865 |
2979 | 866 z5 = MULTIPLY(z4, FIX_1_175875602); |
867 z1 = MULTIPLY(-d1, FIX_0_899976223); | |
868 tmp3 = MULTIPLY(d1, FIX_0_601344887); | |
869 tmp1 = MULTIPLY(-d5, FIX_0_509795579); | |
870 z2 = MULTIPLY(-d5, FIX_2_562915447); | |
871 z4 = MULTIPLY(z4, FIX_0_785694958); | |
2967 | 872 |
2979 | 873 tmp0 = z1 + z5; |
874 tmp1 += z4; | |
875 tmp2 = z2 + z5; | |
876 tmp3 += z4; | |
877 } else { | |
878 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ | |
879 tmp0 = MULTIPLY(d5, FIX_1_175875602); | |
880 tmp1 = MULTIPLY(d5, FIX_0_275899380); | |
881 tmp2 = MULTIPLY(-d5, FIX_1_387039845); | |
882 tmp3 = MULTIPLY(d5, FIX_0_785694958); | |
883 } | |
884 } | |
885 } else { | |
886 if (d3) { | |
887 if (d1) { | |
888 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ | |
889 z5 = d1 + d3; | |
890 tmp3 = MULTIPLY(d1, FIX_0_211164243); | |
891 tmp2 = MULTIPLY(-d3, FIX_1_451774981); | |
892 z1 = MULTIPLY(d1, FIX_1_061594337); | |
893 z2 = MULTIPLY(-d3, FIX_2_172734803); | |
894 z4 = MULTIPLY(z5, FIX_0_785694958); | |
895 z5 = MULTIPLY(z5, FIX_1_175875602); | |
2967 | 896 |
2979 | 897 tmp0 = z1 - z4; |
898 tmp1 = z2 + z4; | |
899 tmp2 += z5; | |
900 tmp3 += z5; | |
901 } else { | |
902 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ | |
903 tmp0 = MULTIPLY(-d3, FIX_0_785694958); | |
904 tmp1 = MULTIPLY(-d3, FIX_1_387039845); | |
905 tmp2 = MULTIPLY(-d3, FIX_0_275899380); | |
906 tmp3 = MULTIPLY(d3, FIX_1_175875602); | |
907 } | |
908 } else { | |
909 if (d1) { | |
910 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ | |
911 tmp0 = MULTIPLY(d1, FIX_0_275899380); | |
912 tmp1 = MULTIPLY(d1, FIX_0_785694958); | |
913 tmp2 = MULTIPLY(d1, FIX_1_175875602); | |
914 tmp3 = MULTIPLY(d1, FIX_1_387039845); | |
915 } else { | |
916 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ | |
917 tmp0 = tmp1 = tmp2 = tmp3 = 0; | |
918 } | |
919 } | |
920 } | |
0 | 921 } |
922 | |
923 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
924 | |
925 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, | |
2979 | 926 CONST_BITS+PASS1_BITS+3); |
0 | 927 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, |
2979 | 928 CONST_BITS+PASS1_BITS+3); |
0 | 929 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, |
2979 | 930 CONST_BITS+PASS1_BITS+3); |
0 | 931 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, |
2979 | 932 CONST_BITS+PASS1_BITS+3); |
0 | 933 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, |
2979 | 934 CONST_BITS+PASS1_BITS+3); |
0 | 935 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, |
2979 | 936 CONST_BITS+PASS1_BITS+3); |
0 | 937 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, |
2979 | 938 CONST_BITS+PASS1_BITS+3); |
0 | 939 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, |
2979 | 940 CONST_BITS+PASS1_BITS+3); |
2967 | 941 |
2979 | 942 dataptr++; /* advance pointer to next column */ |
0 | 943 } |
944 } | |
945 | |
2256 | 946 #undef DCTSIZE |
947 #define DCTSIZE 4 | |
948 #define DCTSTRIDE 8 | |
949 | |
950 void j_rev_dct4(DCTBLOCK data) | |
951 { | |
952 int32_t tmp0, tmp1, tmp2, tmp3; | |
953 int32_t tmp10, tmp11, tmp12, tmp13; | |
954 int32_t z1; | |
955 int32_t d0, d2, d4, d6; | |
956 register DCTELEM *dataptr; | |
957 int rowctr; | |
2262 | 958 |
2256 | 959 /* Pass 1: process rows. */ |
960 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ | |
961 /* furthermore, we scale the results by 2**PASS1_BITS. */ | |
962 | |
2262 | 963 data[0] += 4; |
2967 | 964 |
2256 | 965 dataptr = data; |
966 | |
967 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
968 /* Due to quantization, we will usually find that many of the input | |
969 * coefficients are zero, especially the AC terms. We can exploit this | |
970 * by short-circuiting the IDCT calculation for any row in which all | |
971 * the AC terms are zero. In that case each output is equal to the | |
972 * DC coefficient (with scale factor as needed). | |
973 * With typical images and quantization tables, half or more of the | |
974 * row DCT calculations can be simplified this way. | |
975 */ | |
976 | |
977 register int *idataptr = (int*)dataptr; | |
978 | |
979 d0 = dataptr[0]; | |
980 d2 = dataptr[1]; | |
981 d4 = dataptr[2]; | |
982 d6 = dataptr[3]; | |
983 | |
984 if ((d2 | d4 | d6) == 0) { | |
985 /* AC terms all zero */ | |
986 if (d0) { | |
2979 | 987 /* Compute a 32 bit value to assign. */ |
988 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); | |
989 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); | |
2967 | 990 |
2979 | 991 idataptr[0] = v; |
992 idataptr[1] = v; | |
2256 | 993 } |
2967 | 994 |
2979 | 995 dataptr += DCTSTRIDE; /* advance pointer to next row */ |
2256 | 996 continue; |
997 } | |
2967 | 998 |
2256 | 999 /* Even part: reverse the even part of the forward DCT. */ |
1000 /* The rotator is sqrt(2)*c(-6). */ | |
1001 if (d6) { | |
2979 | 1002 if (d2) { |
1003 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
1004 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
1005 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
1006 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
2256 | 1007 |
2979 | 1008 tmp0 = (d0 + d4) << CONST_BITS; |
1009 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1010 |
2979 | 1011 tmp10 = tmp0 + tmp3; |
1012 tmp13 = tmp0 - tmp3; | |
1013 tmp11 = tmp1 + tmp2; | |
1014 tmp12 = tmp1 - tmp2; | |
1015 } else { | |
1016 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
1017 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
1018 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
2256 | 1019 |
2979 | 1020 tmp0 = (d0 + d4) << CONST_BITS; |
1021 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1022 |
2979 | 1023 tmp10 = tmp0 + tmp3; |
1024 tmp13 = tmp0 - tmp3; | |
1025 tmp11 = tmp1 + tmp2; | |
1026 tmp12 = tmp1 - tmp2; | |
1027 } | |
2262 | 1028 } else { |
2979 | 1029 if (d2) { |
1030 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
1031 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
1032 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
2256 | 1033 |
2979 | 1034 tmp0 = (d0 + d4) << CONST_BITS; |
1035 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1036 |
2979 | 1037 tmp10 = tmp0 + tmp3; |
1038 tmp13 = tmp0 - tmp3; | |
1039 tmp11 = tmp1 + tmp2; | |
1040 tmp12 = tmp1 - tmp2; | |
1041 } else { | |
1042 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
1043 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
1044 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
1045 } | |
2256 | 1046 } |
1047 | |
1048 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
1049 | |
1050 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); | |
1051 dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); | |
1052 dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); | |
1053 dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); | |
1054 | |
2979 | 1055 dataptr += DCTSTRIDE; /* advance pointer to next row */ |
2256 | 1056 } |
1057 | |
1058 /* Pass 2: process columns. */ | |
1059 /* Note that we must descale the results by a factor of 8 == 2**3, */ | |
1060 /* and also undo the PASS1_BITS scaling. */ | |
1061 | |
1062 dataptr = data; | |
1063 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { | |
1064 /* Columns of zeroes can be exploited in the same way as we did with rows. | |
1065 * However, the row calculation has created many nonzero AC terms, so the | |
1066 * simplification applies less often (typically 5% to 10% of the time). | |
1067 * On machines with very fast multiplication, it's possible that the | |
1068 * test takes more time than it's worth. In that case this section | |
1069 * may be commented out. | |
1070 */ | |
1071 | |
1072 d0 = dataptr[DCTSTRIDE*0]; | |
1073 d2 = dataptr[DCTSTRIDE*1]; | |
1074 d4 = dataptr[DCTSTRIDE*2]; | |
1075 d6 = dataptr[DCTSTRIDE*3]; | |
1076 | |
1077 /* Even part: reverse the even part of the forward DCT. */ | |
1078 /* The rotator is sqrt(2)*c(-6). */ | |
1079 if (d6) { | |
2979 | 1080 if (d2) { |
1081 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | |
1082 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
1083 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
1084 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
2256 | 1085 |
2979 | 1086 tmp0 = (d0 + d4) << CONST_BITS; |
1087 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1088 |
2979 | 1089 tmp10 = tmp0 + tmp3; |
1090 tmp13 = tmp0 - tmp3; | |
1091 tmp11 = tmp1 + tmp2; | |
1092 tmp12 = tmp1 - tmp2; | |
1093 } else { | |
1094 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | |
1095 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
1096 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
2256 | 1097 |
2979 | 1098 tmp0 = (d0 + d4) << CONST_BITS; |
1099 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1100 |
2979 | 1101 tmp10 = tmp0 + tmp3; |
1102 tmp13 = tmp0 - tmp3; | |
1103 tmp11 = tmp1 + tmp2; | |
1104 tmp12 = tmp1 - tmp2; | |
1105 } | |
2262 | 1106 } else { |
2979 | 1107 if (d2) { |
1108 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | |
1109 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
1110 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
2256 | 1111 |
2979 | 1112 tmp0 = (d0 + d4) << CONST_BITS; |
1113 tmp1 = (d0 - d4) << CONST_BITS; | |
2256 | 1114 |
2979 | 1115 tmp10 = tmp0 + tmp3; |
1116 tmp13 = tmp0 - tmp3; | |
1117 tmp11 = tmp1 + tmp2; | |
1118 tmp12 = tmp1 - tmp2; | |
1119 } else { | |
1120 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | |
1121 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | |
1122 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | |
1123 } | |
2256 | 1124 } |
1125 | |
1126 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ | |
1127 | |
2262 | 1128 dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); |
1129 dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); | |
1130 dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); | |
1131 dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); | |
2967 | 1132 |
2979 | 1133 dataptr++; /* advance pointer to next column */ |
2256 | 1134 } |
1135 } | |
1136 | |
2257 | 1137 void j_rev_dct2(DCTBLOCK data){ |
1138 int d00, d01, d10, d11; | |
1139 | |
1140 data[0] += 4; | |
1141 d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; | |
1142 d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; | |
1143 d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; | |
1144 d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; | |
2967 | 1145 |
2257 | 1146 data[0+0*DCTSTRIDE]= (d00 + d10)>>3; |
1147 data[1+0*DCTSTRIDE]= (d01 + d11)>>3; | |
1148 data[0+1*DCTSTRIDE]= (d00 - d10)>>3; | |
1149 data[1+1*DCTSTRIDE]= (d01 - d11)>>3; | |
1150 } | |
2256 | 1151 |
2259 | 1152 void j_rev_dct1(DCTBLOCK data){ |
1153 data[0] = (data[0] + 4)>>3; | |
1154 } | |
1155 | |
440
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1156 #undef FIX |
000aeeac27a2
* started to cleanup name clashes for onetime compilation
kabi
parents:
36
diff
changeset
|
1157 #undef CONST_BITS |