Mercurial > libavcodec.hg
annotate faandct.c @ 10893:2aafcafbe1f0 libavcodec
Replace cabac checks in inline functions from h264.h with constants.
No benchmark because its just replacing variables with litteral constants
(so no risk for slowdown outside gcc silliness) and i need sleep.
author | michael |
---|---|
date | Sat, 16 Jan 2010 05:41:33 +0000 |
parents | e9d9d946f213 |
children | 7dd2a45249a9 |
rev | line source |
---|---|
1557 | 1 /* |
2 * Floating point AAN DCT | |
5214 | 3 * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c) |
4 * | |
7821 | 5 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
6 * Copyright (c) 2003 Roman Shaposhnik | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 * |
7821 | 8 * Permission to use, copy, modify, and/or distribute this software for any |
9 * purpose with or without fee is hereby granted, provided that the above | |
10 * copyright notice and this permission notice appear in all copies. | |
1557 | 11 * |
7821 | 12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
1557 | 19 */ |
20 | |
21 /** | |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
7821
diff
changeset
|
22 * @file libavcodec/faandct.c |
2967 | 23 * @brief |
1557 | 24 * Floating point AAN DCT |
25 * @author Michael Niedermayer <michaelni@gmx.at> | |
26 */ | |
27 | |
28 #include "dsputil.h" | |
29 #include "faandct.h" | |
30 | |
31 #define FLOAT float | |
1562
bf452704100f
optionally merge postscale into quantization table for the float aan dct
michael
parents:
1561
diff
changeset
|
32 #ifdef FAAN_POSTSCALE |
bf452704100f
optionally merge postscale into quantization table for the float aan dct
michael
parents:
1561
diff
changeset
|
33 # define SCALE(x) postscale[x] |
bf452704100f
optionally merge postscale into quantization table for the float aan dct
michael
parents:
1561
diff
changeset
|
34 #else |
bf452704100f
optionally merge postscale into quantization table for the float aan dct
michael
parents:
1561
diff
changeset
|
35 # define SCALE(x) 1 |
bf452704100f
optionally merge postscale into quantization table for the float aan dct
michael
parents:
1561
diff
changeset
|
36 #endif |
1557 | 37 |
38 //numbers generated by simple c code (not as accurate as they could be) | |
39 /* | |
40 for(i=0; i<8; i++){ | |
41 printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(i*acosl(-1.0)/(long double)16.0)*sqrtl(2))); | |
42 } | |
43 */ | |
44 #define B0 1.00000000000000000000 | |
45 #define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1 | |
46 #define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1 | |
47 #define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1 | |
48 #define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1 | |
49 #define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1 | |
50 #define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1 | |
51 #define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1 | |
52 | |
53 | |
54 #define A1 0.70710678118654752438 // cos(pi*4/16) | |
55 #define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2) | |
56 #define A5 0.38268343236508977170 // cos(pi*6/16) | |
57 #define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2) | |
58 | |
7129 | 59 static const FLOAT postscale[64]={ |
1557 | 60 B0*B0, B0*B1, B0*B2, B0*B3, B0*B4, B0*B5, B0*B6, B0*B7, |
61 B1*B0, B1*B1, B1*B2, B1*B3, B1*B4, B1*B5, B1*B6, B1*B7, | |
62 B2*B0, B2*B1, B2*B2, B2*B3, B2*B4, B2*B5, B2*B6, B2*B7, | |
63 B3*B0, B3*B1, B3*B2, B3*B3, B3*B4, B3*B5, B3*B6, B3*B7, | |
64 B4*B0, B4*B1, B4*B2, B4*B3, B4*B4, B4*B5, B4*B6, B4*B7, | |
65 B5*B0, B5*B1, B5*B2, B5*B3, B5*B4, B5*B5, B5*B6, B5*B7, | |
66 B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7, | |
67 B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7, | |
68 }; | |
69 | |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
3947
diff
changeset
|
70 static av_always_inline void row_fdct(FLOAT temp[64], DCTELEM * data) |
1557 | 71 { |
72 FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | |
73 FLOAT tmp10, tmp11, tmp12, tmp13; | |
6471 | 74 FLOAT z2, z4, z11, z13; |
75 FLOAT av_unused z5; | |
1557 | 76 int i; |
77 | |
78 for (i=0; i<8*8; i+=8) { | |
79 tmp0= data[0 + i] + data[7 + i]; | |
80 tmp7= data[0 + i] - data[7 + i]; | |
81 tmp1= data[1 + i] + data[6 + i]; | |
82 tmp6= data[1 + i] - data[6 + i]; | |
83 tmp2= data[2 + i] + data[5 + i]; | |
84 tmp5= data[2 + i] - data[5 + i]; | |
85 tmp3= data[3 + i] + data[4 + i]; | |
86 tmp4= data[3 + i] - data[4 + i]; | |
2967 | 87 |
1557 | 88 tmp10= tmp0 + tmp3; |
89 tmp13= tmp0 - tmp3; | |
90 tmp11= tmp1 + tmp2; | |
91 tmp12= tmp1 - tmp2; | |
2967 | 92 |
1557 | 93 temp[0 + i]= tmp10 + tmp11; |
94 temp[4 + i]= tmp10 - tmp11; | |
2967 | 95 |
6408
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
96 tmp12 += tmp13; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
97 tmp12 *= A1; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
98 temp[2 + i]= tmp13 + tmp12; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
99 temp[6 + i]= tmp13 - tmp12; |
2967 | 100 |
6409
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
101 tmp4 += tmp5; |
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
102 tmp5 += tmp6; |
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
103 tmp6 += tmp7; |
1557 | 104 |
6411
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
105 #if 0 |
6409
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
106 z5= (tmp4 - tmp6) * A5; |
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
107 z2= tmp4*A2 + z5; |
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
108 z4= tmp6*A4 + z5; |
6411
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
109 #else |
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
110 z2= tmp4*(A2+A5) - tmp6*A5; |
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
111 z4= tmp6*(A4-A5) + tmp4*A5; |
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
112 #endif |
6410 | 113 tmp5*=A1; |
1557 | 114 |
6410 | 115 z11= tmp7 + tmp5; |
116 z13= tmp7 - tmp5; | |
1557 | 117 |
118 temp[5 + i]= z13 + z2; | |
119 temp[3 + i]= z13 - z2; | |
120 temp[1 + i]= z11 + z4; | |
121 temp[7 + i]= z11 - z4; | |
2967 | 122 } |
1589 | 123 } |
124 | |
125 void ff_faandct(DCTELEM * data) | |
126 { | |
127 FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | |
128 FLOAT tmp10, tmp11, tmp12, tmp13; | |
6471 | 129 FLOAT z2, z4, z11, z13; |
130 FLOAT av_unused z5; | |
1589 | 131 FLOAT temp[64]; |
132 int i; | |
133 | |
134 emms_c(); | |
135 | |
136 row_fdct(temp, data); | |
1557 | 137 |
138 for (i=0; i<8; i++) { | |
139 tmp0= temp[8*0 + i] + temp[8*7 + i]; | |
140 tmp7= temp[8*0 + i] - temp[8*7 + i]; | |
141 tmp1= temp[8*1 + i] + temp[8*6 + i]; | |
142 tmp6= temp[8*1 + i] - temp[8*6 + i]; | |
143 tmp2= temp[8*2 + i] + temp[8*5 + i]; | |
144 tmp5= temp[8*2 + i] - temp[8*5 + i]; | |
145 tmp3= temp[8*3 + i] + temp[8*4 + i]; | |
146 tmp4= temp[8*3 + i] - temp[8*4 + i]; | |
2967 | 147 |
1557 | 148 tmp10= tmp0 + tmp3; |
149 tmp13= tmp0 - tmp3; | |
150 tmp11= tmp1 + tmp2; | |
151 tmp12= tmp1 - tmp2; | |
2967 | 152 |
1563
820e06c6ca9b
4.9-RC FreeBSD doesnt like lrint() so change to lrintf(), not that bsd supports that but we emulate it ...
michael
parents:
1562
diff
changeset
|
153 data[8*0 + i]= lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); |
820e06c6ca9b
4.9-RC FreeBSD doesnt like lrint() so change to lrintf(), not that bsd supports that but we emulate it ...
michael
parents:
1562
diff
changeset
|
154 data[8*4 + i]= lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); |
2967 | 155 |
6408
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
156 tmp12 += tmp13; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
157 tmp12 *= A1; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
158 data[8*2 + i]= lrintf(SCALE(8*2 + i) * (tmp13 + tmp12)); |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
159 data[8*6 + i]= lrintf(SCALE(8*6 + i) * (tmp13 - tmp12)); |
2967 | 160 |
6409
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
161 tmp4 += tmp5; |
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
162 tmp5 += tmp6; |
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
163 tmp6 += tmp7; |
1557 | 164 |
6411
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
165 #if 0 |
6409
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
166 z5= (tmp4 - tmp6) * A5; |
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
167 z2= tmp4*A2 + z5; |
813ff53ed933
Avoid a=b+c but rather use a+=b (gcc is too stupid to do this itself),
michael
parents:
6408
diff
changeset
|
168 z4= tmp6*A4 + z5; |
6411
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
169 #else |
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
170 z2= tmp4*(A2+A5) - tmp6*A5; |
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
171 z4= tmp6*(A4-A5) + tmp4*A5; |
5d50e8a7eb09
Alternative and faster (on gcc x86) plane rotation.
michael
parents:
6410
diff
changeset
|
172 #endif |
6410 | 173 tmp5*=A1; |
1557 | 174 |
6410 | 175 z11= tmp7 + tmp5; |
176 z13= tmp7 - tmp5; | |
1557 | 177 |
1563
820e06c6ca9b
4.9-RC FreeBSD doesnt like lrint() so change to lrintf(), not that bsd supports that but we emulate it ...
michael
parents:
1562
diff
changeset
|
178 data[8*5 + i]= lrintf(SCALE(8*5 + i) * (z13 + z2)); |
820e06c6ca9b
4.9-RC FreeBSD doesnt like lrint() so change to lrintf(), not that bsd supports that but we emulate it ...
michael
parents:
1562
diff
changeset
|
179 data[8*3 + i]= lrintf(SCALE(8*3 + i) * (z13 - z2)); |
820e06c6ca9b
4.9-RC FreeBSD doesnt like lrint() so change to lrintf(), not that bsd supports that but we emulate it ...
michael
parents:
1562
diff
changeset
|
180 data[8*1 + i]= lrintf(SCALE(8*1 + i) * (z11 + z4)); |
820e06c6ca9b
4.9-RC FreeBSD doesnt like lrint() so change to lrintf(), not that bsd supports that but we emulate it ...
michael
parents:
1562
diff
changeset
|
181 data[8*7 + i]= lrintf(SCALE(8*7 + i) * (z11 - z4)); |
1557 | 182 } |
183 } | |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
184 |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
185 void ff_faandct248(DCTELEM * data) |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
186 { |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
187 FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
188 FLOAT tmp10, tmp11, tmp12, tmp13; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
189 FLOAT temp[64]; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
190 int i; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
191 |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
192 emms_c(); |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
193 |
1589 | 194 row_fdct(temp, data); |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
195 |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
196 for (i=0; i<8; i++) { |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
197 tmp0 = temp[8*0 + i] + temp[8*1 + i]; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
198 tmp1 = temp[8*2 + i] + temp[8*3 + i]; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
199 tmp2 = temp[8*4 + i] + temp[8*5 + i]; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
200 tmp3 = temp[8*6 + i] + temp[8*7 + i]; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
201 tmp4 = temp[8*0 + i] - temp[8*1 + i]; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
202 tmp5 = temp[8*2 + i] - temp[8*3 + i]; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
203 tmp6 = temp[8*4 + i] - temp[8*5 + i]; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
204 tmp7 = temp[8*6 + i] - temp[8*7 + i]; |
2967 | 205 |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
206 tmp10 = tmp0 + tmp3; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
207 tmp11 = tmp1 + tmp2; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
208 tmp12 = tmp1 - tmp2; |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
209 tmp13 = tmp0 - tmp3; |
2967 | 210 |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
211 data[8*0 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
212 data[8*4 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); |
2967 | 213 |
6408
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
214 tmp12 += tmp13; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
215 tmp12 *= A1; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
216 data[8*2 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + tmp12)); |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
217 data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - tmp12)); |
2967 | 218 |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
219 tmp10 = tmp4 + tmp7; |
2979 | 220 tmp11 = tmp5 + tmp6; |
221 tmp12 = tmp5 - tmp6; | |
222 tmp13 = tmp4 - tmp7; | |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
223 |
2979 | 224 data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); |
225 data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); | |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
226 |
6408
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
227 tmp12 += tmp13; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
228 tmp12 *= A1; |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
229 data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + tmp12)); |
7af599600f2d
One variable less. No real change to the actual code.
michael
parents:
6398
diff
changeset
|
230 data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - tmp12)); |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
231 } |
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1563
diff
changeset
|
232 } |