Mercurial > libavcodec.hg
annotate jfdctint.c @ 3990:746a60ba3177 libavcodec
enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author | michael |
---|---|
date | Wed, 11 Oct 2006 12:23:40 +0000 |
parents | 9b98e18a1b1c |
children | d6f83e2f8804 |
rev | line source |
---|---|
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
1 /* |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
2 * jfdctint.c |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
3 * |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
4 * This file is part of the Independent JPEG Group's software. |
3669
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
5 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
6 * The authors make NO WARRANTY or representation, either express or implied, |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
7 * with respect to this software, its quality, accuracy, merchantability, or |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
8 * fitness for a particular purpose. This software is provided "AS IS", and |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
9 * you, its user, assume the entire risk as to its quality and accuracy. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
10 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
11 * This software is copyright (C) 1991-1996, Thomas G. Lane. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
12 * All Rights Reserved except as specified below. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
13 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
14 * Permission is hereby granted to use, copy, modify, and distribute this |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
15 * software (or portions thereof) for any purpose, without fee, subject to |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
16 * these conditions: |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
17 * (1) If any part of the source code for this software is distributed, then |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
18 * this README file must be included, with this copyright and no-warranty |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
19 * notice unaltered; and any additions, deletions, or changes to the original |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
20 * files must be clearly indicated in accompanying documentation. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
21 * (2) If only executable code is distributed, then the accompanying |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
22 * documentation must state that "this software is based in part on the work |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
23 * of the Independent JPEG Group". |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
24 * (3) Permission for use of this software is granted only if the user accepts |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
25 * full responsibility for any undesirable consequences; the authors accept |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
26 * NO LIABILITY for damages of any kind. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
27 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
28 * These conditions apply to any software derived from or based on the IJG |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
29 * code, not just to the unmodified library. If you use our work, you ought |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
30 * to acknowledge us. |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
31 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
32 * Permission is NOT granted for the use of any IJG author's name or company |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
33 * name in advertising or publicity relating to this software or products |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
34 * derived from it. This software may be referred to only as "the Independent |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
35 * JPEG Group's software". |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
36 * |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
37 * We specifically permit and encourage the use of this software as the basis |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
38 * of commercial products, provided that all warranty or liability claims are |
9b98e18a1b1c
Add copyright notice from the Independent JPEG Group instead of referring
diego
parents:
2979
diff
changeset
|
39 * assumed by the product vendor. |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
40 * |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
41 * This file contains a slow-but-accurate integer implementation of the |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
42 * forward DCT (Discrete Cosine Transform). |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
43 * |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
44 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
45 * on each column. Direct algorithms are also available, but they are |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
46 * much more complex and seem not to be any faster when reduced to code. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
47 * |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
48 * This implementation is based on an algorithm described in |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
49 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
50 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
51 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
52 * The primary algorithm described there uses 11 multiplies and 29 adds. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
53 * We use their alternate method with 12 multiplies and 32 adds. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
54 * The advantage of this method is that no data path contains more than one |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
55 * multiplication; this allows a very simple and accurate implementation in |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
56 * scaled fixed-point arithmetic, with a minimal number of shifts. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
57 */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
58 |
1106 | 59 /** |
60 * @file jfdctint.c | |
61 * Independent JPEG Group's slow & accurate dct. | |
62 */ | |
2967 | 63 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
64 #include <stdlib.h> |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
65 #include <stdio.h> |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
66 #include "common.h" |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
67 #include "dsputil.h" |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
68 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
69 #define SHIFT_TEMPS |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
70 #define DCTSIZE 8 |
637 | 71 #define BITS_IN_JSAMPLE 8 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
72 #define GLOBAL(x) x |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
73 #define RIGHT_SHIFT(x, n) ((x) >> (n)) |
637 | 74 #define MULTIPLY16C16(var,const) ((var)*(const)) |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
75 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
76 #if 1 //def USE_ACCURATE_ROUNDING |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
77 #define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
78 #else |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
79 #define DESCALE(x,n) RIGHT_SHIFT(x, n) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
80 #endif |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
81 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
82 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
83 /* |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
84 * This module is specialized to the case DCTSIZE = 8. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
85 */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
86 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
87 #if DCTSIZE != 8 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
88 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
89 #endif |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
90 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
91 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
92 /* |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
93 * The poop on this scaling stuff is as follows: |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
94 * |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
95 * Each 1-D DCT step produces outputs which are a factor of sqrt(N) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
96 * larger than the true DCT outputs. The final outputs are therefore |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
97 * a factor of N larger than desired; since N=8 this can be cured by |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
98 * a simple right shift at the end of the algorithm. The advantage of |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
99 * this arrangement is that we save two multiplications per 1-D DCT, |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
100 * because the y0 and y4 outputs need not be divided by sqrt(N). |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
101 * In the IJG code, this factor of 8 is removed by the quantization step |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
102 * (in jcdctmgr.c), NOT in this module. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
103 * |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
104 * We have to do addition and subtraction of the integer inputs, which |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
105 * is no problem, and multiplication by fractional constants, which is |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
106 * a problem to do in integer arithmetic. We multiply all the constants |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
107 * by CONST_SCALE and convert them to integer constants (thus retaining |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
108 * CONST_BITS bits of precision in the constants). After doing a |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
109 * multiplication we have to divide the product by CONST_SCALE, with proper |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
110 * rounding, to produce the correct output. This division can be done |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
111 * cheaply as a right shift of CONST_BITS bits. We postpone shifting |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
112 * as long as possible so that partial sums can be added together with |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
113 * full fractional precision. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
114 * |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
115 * The outputs of the first pass are scaled up by PASS1_BITS bits so that |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
116 * they are represented to better-than-integral precision. These outputs |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
117 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
118 * with the recommended scaling. (For 12-bit sample data, the intermediate |
1064 | 119 * array is int32_t anyway.) |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
120 * |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
121 * To avoid overflow of the 32-bit intermediate results in pass 2, we must |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
122 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
123 * shows that the values given below are the most effective. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
124 */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
125 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
126 #if BITS_IN_JSAMPLE == 8 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
127 #define CONST_BITS 13 |
2979 | 128 #define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */ |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
129 #else |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
130 #define CONST_BITS 13 |
2979 | 131 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
132 #endif |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
133 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
134 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
135 * causing a lot of useless floating-point operations at run time. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
136 * To get around this we use the following pre-calculated constants. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
137 * If you change CONST_BITS you may want to add appropriate values. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
138 * (With a reasonable C compiler, you can just rely on the FIX() macro...) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
139 */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
140 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
141 #if CONST_BITS == 13 |
2979 | 142 #define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */ |
143 #define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */ | |
144 #define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */ | |
145 #define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */ | |
146 #define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */ | |
147 #define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */ | |
148 #define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */ | |
149 #define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */ | |
150 #define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */ | |
151 #define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */ | |
152 #define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */ | |
153 #define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */ | |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
154 #else |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
155 #define FIX_0_298631336 FIX(0.298631336) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
156 #define FIX_0_390180644 FIX(0.390180644) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
157 #define FIX_0_541196100 FIX(0.541196100) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
158 #define FIX_0_765366865 FIX(0.765366865) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
159 #define FIX_0_899976223 FIX(0.899976223) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
160 #define FIX_1_175875602 FIX(1.175875602) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
161 #define FIX_1_501321110 FIX(1.501321110) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
162 #define FIX_1_847759065 FIX(1.847759065) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
163 #define FIX_1_961570560 FIX(1.961570560) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
164 #define FIX_2_053119869 FIX(2.053119869) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
165 #define FIX_2_562915447 FIX(2.562915447) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
166 #define FIX_3_072711026 FIX(3.072711026) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
167 #endif |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
168 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
169 |
1064 | 170 /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
171 * For 8-bit samples with the recommended scaling, all the variable |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
172 * and constant values involved are no more than 16 bits wide, so a |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
173 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
174 * For 12-bit samples, a full 32-bit multiplication will be needed. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
175 */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
176 |
637 | 177 #if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
178 #define MULTIPLY(var,const) MULTIPLY16C16(var,const) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
179 #else |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
180 #define MULTIPLY(var,const) ((var) * (const)) |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
181 #endif |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
182 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
183 |
1589 | 184 static always_inline void row_fdct(DCTELEM * data){ |
185 int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | |
186 int_fast32_t tmp10, tmp11, tmp12, tmp13; | |
187 int_fast32_t z1, z2, z3, z4, z5; | |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
188 DCTELEM *dataptr; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
189 int ctr; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
190 SHIFT_TEMPS |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
191 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
192 /* Pass 1: process rows. */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
193 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
194 /* furthermore, we scale the results by 2**PASS1_BITS. */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
195 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
196 dataptr = data; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
197 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
198 tmp0 = dataptr[0] + dataptr[7]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
199 tmp7 = dataptr[0] - dataptr[7]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
200 tmp1 = dataptr[1] + dataptr[6]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
201 tmp6 = dataptr[1] - dataptr[6]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
202 tmp2 = dataptr[2] + dataptr[5]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
203 tmp5 = dataptr[2] - dataptr[5]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
204 tmp3 = dataptr[3] + dataptr[4]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
205 tmp4 = dataptr[3] - dataptr[4]; |
2967 | 206 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
207 /* Even part per LL&M figure 1 --- note that published figure is faulty; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
208 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
209 */ |
2967 | 210 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
211 tmp10 = tmp0 + tmp3; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
212 tmp13 = tmp0 - tmp3; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
213 tmp11 = tmp1 + tmp2; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
214 tmp12 = tmp1 - tmp2; |
2967 | 215 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
216 dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
217 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); |
2967 | 218 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
219 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
220 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), |
2979 | 221 CONST_BITS-PASS1_BITS); |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
222 dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), |
2979 | 223 CONST_BITS-PASS1_BITS); |
2967 | 224 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
225 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
226 * cK represents cos(K*pi/16). |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
227 * i0..i3 in the paper are tmp4..tmp7 here. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
228 */ |
2967 | 229 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
230 z1 = tmp4 + tmp7; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
231 z2 = tmp5 + tmp6; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
232 z3 = tmp4 + tmp6; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
233 z4 = tmp5 + tmp7; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
234 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ |
2967 | 235 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
236 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
237 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
238 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
239 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
240 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
241 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
242 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
243 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
2967 | 244 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
245 z3 += z5; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
246 z4 += z5; |
2967 | 247 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
248 dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
249 dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
250 dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
251 dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); |
2967 | 252 |
2979 | 253 dataptr += DCTSIZE; /* advance pointer to next row */ |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
254 } |
1589 | 255 } |
256 | |
257 /* | |
258 * Perform the forward DCT on one block of samples. | |
259 */ | |
260 | |
261 GLOBAL(void) | |
262 ff_jpeg_fdct_islow (DCTELEM * data) | |
263 { | |
264 int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | |
265 int_fast32_t tmp10, tmp11, tmp12, tmp13; | |
266 int_fast32_t z1, z2, z3, z4, z5; | |
267 DCTELEM *dataptr; | |
268 int ctr; | |
269 SHIFT_TEMPS | |
270 | |
271 row_fdct(data); | |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
272 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
273 /* Pass 2: process columns. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
274 * We remove the PASS1_BITS scaling, but leave the results scaled up |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
275 * by an overall factor of 8. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
276 */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
277 |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
278 dataptr = data; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
279 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
280 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
281 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
282 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
283 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
284 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
285 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
286 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
287 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; |
2967 | 288 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
289 /* Even part per LL&M figure 1 --- note that published figure is faulty; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
290 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
291 */ |
2967 | 292 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
293 tmp10 = tmp0 + tmp3; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
294 tmp13 = tmp0 - tmp3; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
295 tmp11 = tmp1 + tmp2; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
296 tmp12 = tmp1 - tmp2; |
2967 | 297 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
298 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
299 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); |
2967 | 300 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
301 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
302 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), |
2979 | 303 CONST_BITS+PASS1_BITS); |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
304 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), |
2979 | 305 CONST_BITS+PASS1_BITS); |
2967 | 306 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
307 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
308 * cK represents cos(K*pi/16). |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
309 * i0..i3 in the paper are tmp4..tmp7 here. |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
310 */ |
2967 | 311 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
312 z1 = tmp4 + tmp7; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
313 z2 = tmp5 + tmp6; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
314 z3 = tmp4 + tmp6; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
315 z4 = tmp5 + tmp7; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
316 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ |
2967 | 317 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
318 tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
319 tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
320 tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
321 tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
322 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
323 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
324 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
325 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ |
2967 | 326 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
327 z3 += z5; |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
328 z4 += z5; |
2967 | 329 |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
330 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, |
2979 | 331 CONST_BITS+PASS1_BITS); |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
332 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, |
2979 | 333 CONST_BITS+PASS1_BITS); |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
334 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, |
2979 | 335 CONST_BITS+PASS1_BITS); |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
336 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, |
2979 | 337 CONST_BITS+PASS1_BITS); |
2967 | 338 |
2979 | 339 dataptr++; /* advance pointer to next column */ |
625
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
340 } |
bb6a69f9d409
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents:
diff
changeset
|
341 } |
1567 | 342 |
343 /* | |
344 * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT | |
345 * on the rows and then, instead of doing even and odd, part on the colums | |
346 * you do even part two times. | |
347 */ | |
348 GLOBAL(void) | |
349 ff_fdct248_islow (DCTELEM * data) | |
350 { | |
1589 | 351 int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
352 int_fast32_t tmp10, tmp11, tmp12, tmp13; | |
353 int_fast32_t z1; | |
1567 | 354 DCTELEM *dataptr; |
355 int ctr; | |
356 SHIFT_TEMPS | |
357 | |
1589 | 358 row_fdct(data); |
1567 | 359 |
360 /* Pass 2: process columns. | |
361 * We remove the PASS1_BITS scaling, but leave the results scaled up | |
362 * by an overall factor of 8. | |
363 */ | |
364 | |
365 dataptr = data; | |
366 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | |
367 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1]; | |
368 tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; | |
369 tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; | |
370 tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; | |
371 tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1]; | |
372 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; | |
373 tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; | |
374 tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; | |
2967 | 375 |
1567 | 376 tmp10 = tmp0 + tmp3; |
377 tmp11 = tmp1 + tmp2; | |
378 tmp12 = tmp1 - tmp2; | |
379 tmp13 = tmp0 - tmp3; | |
2967 | 380 |
1567 | 381 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); |
382 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); | |
2967 | 383 |
1567 | 384 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); |
385 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | |
2979 | 386 CONST_BITS+PASS1_BITS); |
1567 | 387 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), |
2979 | 388 CONST_BITS+PASS1_BITS); |
1567 | 389 |
390 tmp10 = tmp4 + tmp7; | |
391 tmp11 = tmp5 + tmp6; | |
392 tmp12 = tmp5 - tmp6; | |
393 tmp13 = tmp4 - tmp7; | |
394 | |
395 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); | |
396 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); | |
2967 | 397 |
1567 | 398 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); |
399 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), | |
2979 | 400 CONST_BITS+PASS1_BITS); |
1567 | 401 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), |
2979 | 402 CONST_BITS+PASS1_BITS); |
2967 | 403 |
2979 | 404 dataptr++; /* advance pointer to next column */ |
1567 | 405 } |
406 } |