Mercurial > libavcodec.hg
annotate faanidct.c @ 11864:7204cb7dd601 libavcodec
Quant changes only once per MB so move the corresponding scale factor assignment
out of the block decoding loop. Indeo4 doesn't use any scale table but the quant
level itself as scale. Therefore access scale table only if its pointer != NULL.
author | maxim |
---|---|
date | Thu, 10 Jun 2010 17:31:12 +0000 |
parents | b72f6c4cee12 |
children |
rev | line source |
---|---|
6407 | 1 /* |
2 * Floating point AAN IDCT | |
3 * Copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
6421 | 21 #include "faanidct.h" |
6407 | 22 |
6418 | 23 /* To allow switching to double. */ |
6407 | 24 #define FLOAT float |
25 | |
26 #define B0 1.0000000000000000000000 | |
27 #define B1 1.3870398453221474618216 // cos(pi*1/16)sqrt(2) | |
28 #define B2 1.3065629648763765278566 // cos(pi*2/16)sqrt(2) | |
29 #define B3 1.1758756024193587169745 // cos(pi*3/16)sqrt(2) | |
30 #define B4 1.0000000000000000000000 // cos(pi*4/16)sqrt(2) | |
31 #define B5 0.7856949583871021812779 // cos(pi*5/16)sqrt(2) | |
32 #define B6 0.5411961001461969843997 // cos(pi*6/16)sqrt(2) | |
33 #define B7 0.2758993792829430123360 // cos(pi*7/16)sqrt(2) | |
34 | |
35 #define A4 0.70710678118654752438 // cos(pi*4/16) | |
36 #define A2 0.92387953251128675613 // cos(pi*2/16) | |
37 | |
38 static const FLOAT prescale[64]={ | |
39 B0*B0/8, B0*B1/8, B0*B2/8, B0*B3/8, B0*B4/8, B0*B5/8, B0*B6/8, B0*B7/8, | |
40 B1*B0/8, B1*B1/8, B1*B2/8, B1*B3/8, B1*B4/8, B1*B5/8, B1*B6/8, B1*B7/8, | |
41 B2*B0/8, B2*B1/8, B2*B2/8, B2*B3/8, B2*B4/8, B2*B5/8, B2*B6/8, B2*B7/8, | |
42 B3*B0/8, B3*B1/8, B3*B2/8, B3*B3/8, B3*B4/8, B3*B5/8, B3*B6/8, B3*B7/8, | |
43 B4*B0/8, B4*B1/8, B4*B2/8, B4*B3/8, B4*B4/8, B4*B5/8, B4*B6/8, B4*B7/8, | |
44 B5*B0/8, B5*B1/8, B5*B2/8, B5*B3/8, B5*B4/8, B5*B5/8, B5*B6/8, B5*B7/8, | |
45 B6*B0/8, B6*B1/8, B6*B2/8, B6*B3/8, B6*B4/8, B6*B5/8, B6*B6/8, B6*B7/8, | |
46 B7*B0/8, B7*B1/8, B7*B2/8, B7*B3/8, B7*B4/8, B7*B5/8, B7*B6/8, B7*B7/8, | |
47 }; | |
48 | |
49 static inline void p8idct(DCTELEM data[64], FLOAT temp[64], uint8_t *dest, int stride, int x, int y, int type){ | |
50 int i; | |
6471 | 51 FLOAT av_unused tmp0; |
6407 | 52 FLOAT s04, d04, s17, d17, s26, d26, s53, d53; |
53 FLOAT os07, os16, os25, os34; | |
54 FLOAT od07, od16, od25, od34; | |
55 | |
56 for(i=0; i<y*8; i+=y){ | |
57 s17= temp[1*x + i] + temp[7*x + i]; | |
58 d17= temp[1*x + i] - temp[7*x + i]; | |
59 s53= temp[5*x + i] + temp[3*x + i]; | |
60 d53= temp[5*x + i] - temp[3*x + i]; | |
61 | |
62 od07= s17 + s53; | |
63 od25= (s17 - s53)*(2*A4); | |
64 | |
65 #if 0 //these 2 are equivalent | |
66 tmp0= (d17 + d53)*(2*A2); | |
67 od34= d17*( 2*B6) - tmp0; | |
68 od16= d53*(-2*B2) + tmp0; | |
69 #else | |
70 od34= d17*(2*(B6-A2)) - d53*(2*A2); | |
71 od16= d53*(2*(A2-B2)) + d17*(2*A2); | |
72 #endif | |
73 | |
74 od16 -= od07; | |
75 od25 -= od16; | |
76 od34 += od25; | |
77 | |
78 s26 = temp[2*x + i] + temp[6*x + i]; | |
79 d26 = temp[2*x + i] - temp[6*x + i]; | |
6415
2eb1b18d6282
Remove another temporary variable with which gcc has problems.
michael
parents:
6407
diff
changeset
|
80 d26*= 2*A4; |
2eb1b18d6282
Remove another temporary variable with which gcc has problems.
michael
parents:
6407
diff
changeset
|
81 d26-= s26; |
6407 | 82 |
83 s04= temp[0*x + i] + temp[4*x + i]; | |
84 d04= temp[0*x + i] - temp[4*x + i]; | |
85 | |
86 os07= s04 + s26; | |
87 os34= s04 - s26; | |
6415
2eb1b18d6282
Remove another temporary variable with which gcc has problems.
michael
parents:
6407
diff
changeset
|
88 os16= d04 + d26; |
2eb1b18d6282
Remove another temporary variable with which gcc has problems.
michael
parents:
6407
diff
changeset
|
89 os25= d04 - d26; |
6407 | 90 |
91 if(type==0){ | |
92 temp[0*x + i]= os07 + od07; | |
93 temp[7*x + i]= os07 - od07; | |
94 temp[1*x + i]= os16 + od16; | |
95 temp[6*x + i]= os16 - od16; | |
96 temp[2*x + i]= os25 + od25; | |
97 temp[5*x + i]= os25 - od25; | |
98 temp[3*x + i]= os34 - od34; | |
99 temp[4*x + i]= os34 + od34; | |
100 }else if(type==1){ | |
101 data[0*x + i]= lrintf(os07 + od07); | |
102 data[7*x + i]= lrintf(os07 - od07); | |
103 data[1*x + i]= lrintf(os16 + od16); | |
104 data[6*x + i]= lrintf(os16 - od16); | |
105 data[2*x + i]= lrintf(os25 + od25); | |
106 data[5*x + i]= lrintf(os25 - od25); | |
107 data[3*x + i]= lrintf(os34 - od34); | |
108 data[4*x + i]= lrintf(os34 + od34); | |
109 }else if(type==2){ | |
110 dest[0*stride + i]= av_clip_uint8(((int)dest[0*stride + i]) + lrintf(os07 + od07)); | |
111 dest[7*stride + i]= av_clip_uint8(((int)dest[7*stride + i]) + lrintf(os07 - od07)); | |
112 dest[1*stride + i]= av_clip_uint8(((int)dest[1*stride + i]) + lrintf(os16 + od16)); | |
113 dest[6*stride + i]= av_clip_uint8(((int)dest[6*stride + i]) + lrintf(os16 - od16)); | |
114 dest[2*stride + i]= av_clip_uint8(((int)dest[2*stride + i]) + lrintf(os25 + od25)); | |
115 dest[5*stride + i]= av_clip_uint8(((int)dest[5*stride + i]) + lrintf(os25 - od25)); | |
116 dest[3*stride + i]= av_clip_uint8(((int)dest[3*stride + i]) + lrintf(os34 - od34)); | |
117 dest[4*stride + i]= av_clip_uint8(((int)dest[4*stride + i]) + lrintf(os34 + od34)); | |
118 }else{ | |
119 dest[0*stride + i]= av_clip_uint8(lrintf(os07 + od07)); | |
120 dest[7*stride + i]= av_clip_uint8(lrintf(os07 - od07)); | |
121 dest[1*stride + i]= av_clip_uint8(lrintf(os16 + od16)); | |
122 dest[6*stride + i]= av_clip_uint8(lrintf(os16 - od16)); | |
123 dest[2*stride + i]= av_clip_uint8(lrintf(os25 + od25)); | |
124 dest[5*stride + i]= av_clip_uint8(lrintf(os25 - od25)); | |
125 dest[3*stride + i]= av_clip_uint8(lrintf(os34 - od34)); | |
126 dest[4*stride + i]= av_clip_uint8(lrintf(os34 + od34)); | |
127 } | |
128 } | |
129 } | |
130 | |
131 void ff_faanidct(DCTELEM block[64]){ | |
132 FLOAT temp[64]; | |
133 int i; | |
134 | |
135 emms_c(); | |
136 | |
137 for(i=0; i<64; i++) | |
138 temp[i] = block[i] * prescale[i]; | |
139 | |
140 p8idct(block, temp, NULL, 0, 1, 8, 0); | |
141 p8idct(block, temp, NULL, 0, 8, 1, 1); | |
142 } | |
143 | |
144 void ff_faanidct_add(uint8_t *dest, int line_size, DCTELEM block[64]){ | |
145 FLOAT temp[64]; | |
146 int i; | |
147 | |
148 emms_c(); | |
149 | |
150 for(i=0; i<64; i++) | |
151 temp[i] = block[i] * prescale[i]; | |
152 | |
153 p8idct(block, temp, NULL, 0, 1, 8, 0); | |
154 p8idct(NULL , temp, dest, line_size, 8, 1, 2); | |
155 } | |
156 | |
157 void ff_faanidct_put(uint8_t *dest, int line_size, DCTELEM block[64]){ | |
158 FLOAT temp[64]; | |
159 int i; | |
160 | |
161 emms_c(); | |
162 | |
163 for(i=0; i<64; i++) | |
164 temp[i] = block[i] * prescale[i]; | |
165 | |
166 p8idct(block, temp, NULL, 0, 1, 8, 0); | |
167 p8idct(NULL , temp, dest, line_size, 8, 1, 3); | |
168 } |