Mercurial > libavcodec.hg
comparison simple_idct.c @ 205:ccf36af385f3 libavcodec
(commit by michael)
faster simple_idct in C
author | arpi_esp |
---|---|
date | Mon, 14 Jan 2002 04:39:59 +0000 |
parents | bd77d3cbb233 |
children | 73df666cacc7 |
comparison
equal
deleted
inserted
replaced
204:fceb435fae6b | 205:ccf36af385f3 |
---|---|
43 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | 43 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
44 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | 44 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
45 #define ROW_SHIFT 11 | 45 #define ROW_SHIFT 11 |
46 #define COL_SHIFT 20 // 6 | 46 #define COL_SHIFT 20 // 6 |
47 #endif | 47 #endif |
48 #if 1 | 48 |
49 static void inline idctRow (int16_t * row) | 49 /* 8x8 Matrix used to do a trivial (slow) 8 point IDCT */ |
50 static int coeff[64]={ | |
51 W4, W4, W4, W4, W4, W4, W4, W4, | |
52 W1, W3, W5, W7,-W7,-W5,-W3,-W1, | |
53 W2, W6,-W6,-W2,-W2,-W6, W6, W2, | |
54 W3,-W7,-W1,-W5, W5, W1, W7,-W3, | |
55 W4,-W4,-W4, W4, W4,-W4,-W4, W4, | |
56 W5,-W1, W7, W3,-W3,-W7, W1,-W5, | |
57 W6,-W2, W2,-W6,-W6, W2,-W2, W6, | |
58 W7,-W5, W3,-W1, W1,-W3, W5,-W7 | |
59 }; | |
60 | |
61 static int inline idctRowCondZ (int16_t * row) | |
50 { | 62 { |
51 int a0, a1, a2, a3, b0, b1, b2, b3; | 63 int a0, a1, a2, a3, b0, b1, b2, b3; |
52 const int C1 =W1; | 64 |
53 const int C2 =W2; | 65 if( !( ((uint32_t*)row)[0]|((uint32_t*)row)[1] |((uint32_t*)row)[2] |((uint32_t*)row)[3])) { |
54 const int C3 =W3; | 66 /* row[0] = row[1] = row[2] = row[3] = row[4] = |
55 const int C4 =W4; | 67 row[5] = row[6] = row[7] = 0;*/ |
56 const int C5 =W5; | 68 return 0; |
57 const int C6 =W6; | 69 } |
58 const int C7 =W7; | 70 |
59 | 71 if(!( ((uint32_t*)row)[2] |((uint32_t*)row)[3] )){ |
60 if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7])) { | 72 a0 = W4*row[0] + W2*row[2] + (1<<(ROW_SHIFT-1)); |
61 row[0] = row[1] = row[2] = row[3] = row[4] = | 73 a1 = W4*row[0] + W6*row[2] + (1<<(ROW_SHIFT-1)); |
62 row[5] = row[6] = row[7] = row[0]<<3; | 74 a2 = W4*row[0] - W6*row[2] + (1<<(ROW_SHIFT-1)); |
63 return; | 75 a3 = W4*row[0] - W2*row[2] + (1<<(ROW_SHIFT-1)); |
64 } | 76 |
65 | 77 b0 = W1*row[1] + W3*row[3]; |
66 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1)); | 78 b1 = W3*row[1] - W7*row[3]; |
67 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1)); | 79 b2 = W5*row[1] - W1*row[3]; |
68 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1)); | 80 b3 = W7*row[1] - W5*row[3]; |
69 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1)); | 81 }else{ |
70 | 82 a0 = W4*row[0] + W2*row[2] + W4*row[4] + W6*row[6] + (1<<(ROW_SHIFT-1)); |
71 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7]; | 83 a1 = W4*row[0] + W6*row[2] - W4*row[4] - W2*row[6] + (1<<(ROW_SHIFT-1)); |
72 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7]; | 84 a2 = W4*row[0] - W6*row[2] - W4*row[4] + W2*row[6] + (1<<(ROW_SHIFT-1)); |
73 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7]; | 85 a3 = W4*row[0] - W2*row[2] + W4*row[4] - W6*row[6] + (1<<(ROW_SHIFT-1)); |
74 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7]; | 86 |
87 b0 = W1*row[1] + W3*row[3] + W5*row[5] + W7*row[7]; | |
88 b1 = W3*row[1] - W7*row[3] - W1*row[5] - W5*row[7]; | |
89 b2 = W5*row[1] - W1*row[3] + W7*row[5] + W3*row[7]; | |
90 b3 = W7*row[1] - W5*row[3] + W3*row[5] - W1*row[7]; | |
91 } | |
75 | 92 |
76 row[0] = (a0 + b0) >> ROW_SHIFT; | 93 row[0] = (a0 + b0) >> ROW_SHIFT; |
77 row[1] = (a1 + b1) >> ROW_SHIFT; | 94 row[1] = (a1 + b1) >> ROW_SHIFT; |
78 row[2] = (a2 + b2) >> ROW_SHIFT; | 95 row[2] = (a2 + b2) >> ROW_SHIFT; |
79 row[3] = (a3 + b3) >> ROW_SHIFT; | 96 row[3] = (a3 + b3) >> ROW_SHIFT; |
80 row[4] = (a3 - b3) >> ROW_SHIFT; | 97 row[4] = (a3 - b3) >> ROW_SHIFT; |
81 row[5] = (a2 - b2) >> ROW_SHIFT; | 98 row[5] = (a2 - b2) >> ROW_SHIFT; |
82 row[6] = (a1 - b1) >> ROW_SHIFT; | 99 row[6] = (a1 - b1) >> ROW_SHIFT; |
83 row[7] = (a0 - b0) >> ROW_SHIFT; | 100 row[7] = (a0 - b0) >> ROW_SHIFT; |
101 | |
102 return 1; | |
103 } | |
104 | |
105 static int inline idctRowCondDC (int16_t * row) | |
106 { | |
107 int a0, a1, a2, a3, b0, b1, b2, b3; | |
108 | |
109 if( !( ((uint32_t*)row)[1] |((uint32_t*)row)[2] |((uint32_t*)row)[3]| row[1])) { | |
110 // row[0] = row[1] = row[2] = row[3] = row[4] = row[5] = row[6] = row[7] = row[0]<<3; | |
111 uint16_t temp= row[0]<<3; | |
112 ((uint32_t*)row)[0]=((uint32_t*)row)[1]= | |
113 ((uint32_t*)row)[2]=((uint32_t*)row)[3]= temp + (temp<<16); | |
114 return 0; | |
115 } | |
116 | |
117 if(!( ((uint32_t*)row)[2] |((uint32_t*)row)[3] )){ | |
118 a0 = W4*row[0] + W2*row[2] + (1<<(ROW_SHIFT-1)); | |
119 a1 = W4*row[0] + W6*row[2] + (1<<(ROW_SHIFT-1)); | |
120 a2 = W4*row[0] - W6*row[2] + (1<<(ROW_SHIFT-1)); | |
121 a3 = W4*row[0] - W2*row[2] + (1<<(ROW_SHIFT-1)); | |
122 | |
123 b0 = W1*row[1] + W3*row[3]; | |
124 b1 = W3*row[1] - W7*row[3]; | |
125 b2 = W5*row[1] - W1*row[3]; | |
126 b3 = W7*row[1] - W5*row[3]; | |
127 }else{ | |
128 a0 = W4*row[0] + W2*row[2] + W4*row[4] + W6*row[6] + (1<<(ROW_SHIFT-1)); | |
129 a1 = W4*row[0] + W6*row[2] - W4*row[4] - W2*row[6] + (1<<(ROW_SHIFT-1)); | |
130 a2 = W4*row[0] - W6*row[2] - W4*row[4] + W2*row[6] + (1<<(ROW_SHIFT-1)); | |
131 a3 = W4*row[0] - W2*row[2] + W4*row[4] - W6*row[6] + (1<<(ROW_SHIFT-1)); | |
132 | |
133 b0 = W1*row[1] + W3*row[3] + W5*row[5] + W7*row[7]; | |
134 b1 = W3*row[1] - W7*row[3] - W1*row[5] - W5*row[7]; | |
135 b2 = W5*row[1] - W1*row[3] + W7*row[5] + W3*row[7]; | |
136 b3 = W7*row[1] - W5*row[3] + W3*row[5] - W1*row[7]; | |
137 } | |
138 | |
139 row[0] = (a0 + b0) >> ROW_SHIFT; | |
140 row[7] = (a0 - b0) >> ROW_SHIFT; | |
141 row[1] = (a1 + b1) >> ROW_SHIFT; | |
142 row[6] = (a1 - b1) >> ROW_SHIFT; | |
143 row[2] = (a2 + b2) >> ROW_SHIFT; | |
144 row[5] = (a2 - b2) >> ROW_SHIFT; | |
145 row[3] = (a3 + b3) >> ROW_SHIFT; | |
146 row[4] = (a3 - b3) >> ROW_SHIFT; | |
147 | |
148 return 1; | |
84 } | 149 } |
85 | 150 |
86 static void inline idctCol (int16_t * col) | 151 static void inline idctCol (int16_t * col) |
87 { | 152 { |
88 int a0, a1, a2, a3, b0, b1, b2, b3; | 153 |
89 const int C1 =W1; | |
90 const int C2 =W2; | |
91 const int C3 =W3; | |
92 const int C4 =W4; | |
93 const int C5 =W5; | |
94 const int C6 =W6; | |
95 const int C7 =W7; | |
96 /* | 154 /* |
97 if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) { | 155 if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) { |
98 col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] = | 156 col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] = |
99 col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3; | 157 col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3; |
100 return; | 158 return; |
101 }*/ | 159 }*/ |
160 | |
161 int a0, a1, a2, a3, b0, b1, b2, b3; | |
102 col[0] += (1<<(COL_SHIFT-1))/W4; | 162 col[0] += (1<<(COL_SHIFT-1))/W4; |
103 a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6]; | 163 a0 = W4*col[8*0] + W2*col[8*2] + W4*col[8*4] + W6*col[8*6]; |
104 a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6]; | 164 a1 = W4*col[8*0] + W6*col[8*2] - W4*col[8*4] - W2*col[8*6]; |
105 a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6]; | 165 a2 = W4*col[8*0] - W6*col[8*2] - W4*col[8*4] + W2*col[8*6]; |
106 a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6]; | 166 a3 = W4*col[8*0] - W2*col[8*2] + W4*col[8*4] - W6*col[8*6]; |
107 | 167 |
108 b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7]; | 168 b0 = W1*col[8*1] + W3*col[8*3] + W5*col[8*5] + W7*col[8*7]; |
109 b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7]; | 169 b1 = W3*col[8*1] - W7*col[8*3] - W1*col[8*5] - W5*col[8*7]; |
110 b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7]; | 170 b2 = W5*col[8*1] - W1*col[8*3] + W7*col[8*5] + W3*col[8*7]; |
111 b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7]; | 171 b3 = W7*col[8*1] - W5*col[8*3] + W3*col[8*5] - W1*col[8*7]; |
112 | 172 |
113 col[8*0] = (a0 + b0) >> COL_SHIFT; | 173 col[8*0] = (a0 + b0) >> COL_SHIFT; |
174 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
114 col[8*1] = (a1 + b1) >> COL_SHIFT; | 175 col[8*1] = (a1 + b1) >> COL_SHIFT; |
176 col[8*6] = (a1 - b1) >> COL_SHIFT; | |
115 col[8*2] = (a2 + b2) >> COL_SHIFT; | 177 col[8*2] = (a2 + b2) >> COL_SHIFT; |
178 col[8*5] = (a2 - b2) >> COL_SHIFT; | |
116 col[8*3] = (a3 + b3) >> COL_SHIFT; | 179 col[8*3] = (a3 + b3) >> COL_SHIFT; |
117 col[8*4] = (a3 - b3) >> COL_SHIFT; | 180 col[8*4] = (a3 - b3) >> COL_SHIFT; |
181 } | |
182 | |
183 static void inline idctSparseCol (int16_t * col) | |
184 { | |
185 int a0, a1, a2, a3, b0, b1, b2, b3; | |
186 col[0] += (1<<(COL_SHIFT-1))/W4; | |
187 a0 = W4*col[8*0]; | |
188 a1 = W4*col[8*0]; | |
189 a2 = W4*col[8*0]; | |
190 a3 = W4*col[8*0]; | |
191 | |
192 if(col[8*2]){ | |
193 a0 += + W2*col[8*2]; | |
194 a1 += + W6*col[8*2]; | |
195 a2 += - W6*col[8*2]; | |
196 a3 += - W2*col[8*2]; | |
197 } | |
198 | |
199 if(col[8*4]){ | |
200 a0 += + W4*col[8*4]; | |
201 a1 += - W4*col[8*4]; | |
202 a2 += - W4*col[8*4]; | |
203 a3 += + W4*col[8*4]; | |
204 } | |
205 | |
206 if(col[8*6]){ | |
207 a0 += + W6*col[8*6]; | |
208 a1 += - W2*col[8*6]; | |
209 a2 += + W2*col[8*6]; | |
210 a3 += - W6*col[8*6]; | |
211 } | |
212 | |
213 if(col[8*1]){ | |
214 b0 = W1*col[8*1]; | |
215 b1 = W3*col[8*1]; | |
216 b2 = W5*col[8*1]; | |
217 b3 = W7*col[8*1]; | |
218 }else{ | |
219 b0 = | |
220 b1 = | |
221 b2 = | |
222 b3 = 0; | |
223 } | |
224 | |
225 if(col[8*3]){ | |
226 b0 += + W3*col[8*3]; | |
227 b1 += - W7*col[8*3]; | |
228 b2 += - W1*col[8*3]; | |
229 b3 += - W5*col[8*3]; | |
230 } | |
231 | |
232 if(col[8*5]){ | |
233 b0 += + W5*col[8*5]; | |
234 b1 += - W1*col[8*5]; | |
235 b2 += + W7*col[8*5]; | |
236 b3 += + W3*col[8*5]; | |
237 } | |
238 | |
239 if(col[8*7]){ | |
240 b0 += + W7*col[8*7]; | |
241 b1 += - W5*col[8*7]; | |
242 b2 += + W3*col[8*7]; | |
243 b3 += - W1*col[8*7]; | |
244 } | |
245 | |
246 if(!(b0|b1|b2|b3)){ | |
247 col[8*0] = (a0) >> COL_SHIFT; | |
248 col[8*7] = (a0) >> COL_SHIFT; | |
249 col[8*1] = (a1) >> COL_SHIFT; | |
250 col[8*6] = (a1) >> COL_SHIFT; | |
251 col[8*2] = (a2) >> COL_SHIFT; | |
252 col[8*5] = (a2) >> COL_SHIFT; | |
253 col[8*3] = (a3) >> COL_SHIFT; | |
254 col[8*4] = (a3) >> COL_SHIFT; | |
255 }else{ | |
256 col[8*0] = (a0 + b0) >> COL_SHIFT; | |
257 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
258 col[8*1] = (a1 + b1) >> COL_SHIFT; | |
259 col[8*6] = (a1 - b1) >> COL_SHIFT; | |
260 col[8*2] = (a2 + b2) >> COL_SHIFT; | |
261 col[8*5] = (a2 - b2) >> COL_SHIFT; | |
262 col[8*3] = (a3 + b3) >> COL_SHIFT; | |
263 col[8*4] = (a3 - b3) >> COL_SHIFT; | |
264 } | |
265 } | |
266 | |
267 static void inline idctSparse2Col (int16_t * col) | |
268 { | |
269 int a0, a1, a2, a3, b0, b1, b2, b3; | |
270 col[0] += (1<<(COL_SHIFT-1))/W4; | |
271 a0 = W4*col[8*0]; | |
272 a1 = W4*col[8*0]; | |
273 a2 = W4*col[8*0]; | |
274 a3 = W4*col[8*0]; | |
275 | |
276 if(col[8*2]){ | |
277 a0 += + W2*col[8*2]; | |
278 a1 += + W6*col[8*2]; | |
279 a2 += - W6*col[8*2]; | |
280 a3 += - W2*col[8*2]; | |
281 } | |
282 | |
283 if(col[8*4]){ | |
284 a0 += + W4*col[8*4]; | |
285 a1 += - W4*col[8*4]; | |
286 a2 += - W4*col[8*4]; | |
287 a3 += + W4*col[8*4]; | |
288 } | |
289 | |
290 if(col[8*6]){ | |
291 a0 += + W6*col[8*6]; | |
292 a1 += - W2*col[8*6]; | |
293 a2 += + W2*col[8*6]; | |
294 a3 += - W6*col[8*6]; | |
295 } | |
296 | |
297 if(col[8*1] || 1){ | |
298 b0 = W1*col[8*1]; | |
299 b1 = W3*col[8*1]; | |
300 b2 = W5*col[8*1]; | |
301 b3 = W7*col[8*1]; | |
302 }else{ | |
303 b0 = | |
304 b1 = | |
305 b2 = | |
306 b3 = 0; | |
307 } | |
308 | |
309 if(col[8*3]){ | |
310 b0 += + W3*col[8*3]; | |
311 b1 += - W7*col[8*3]; | |
312 b2 += - W1*col[8*3]; | |
313 b3 += - W5*col[8*3]; | |
314 } | |
315 | |
316 if(col[8*5]){ | |
317 b0 += + W5*col[8*5]; | |
318 b1 += - W1*col[8*5]; | |
319 b2 += + W7*col[8*5]; | |
320 b3 += + W3*col[8*5]; | |
321 } | |
322 | |
323 if(col[8*7]){ | |
324 b0 += + W7*col[8*7]; | |
325 b1 += - W5*col[8*7]; | |
326 b2 += + W3*col[8*7]; | |
327 b3 += - W1*col[8*7]; | |
328 } | |
329 | |
330 col[8*0] = (a0 + b0) >> COL_SHIFT; | |
331 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
332 col[8*1] = (a1 + b1) >> COL_SHIFT; | |
333 col[8*6] = (a1 - b1) >> COL_SHIFT; | |
334 col[8*2] = (a2 + b2) >> COL_SHIFT; | |
118 col[8*5] = (a2 - b2) >> COL_SHIFT; | 335 col[8*5] = (a2 - b2) >> COL_SHIFT; |
119 col[8*6] = (a1 - b1) >> COL_SHIFT; | |
120 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
121 } | |
122 | |
123 void simple_idct (short *block) | |
124 { | |
125 int i; | |
126 for(i=0; i<8; i++) | |
127 idctRow(block + 8*i); | |
128 | |
129 for(i=0; i<8; i++) | |
130 idctCol(block + i); | |
131 | |
132 } | |
133 | |
134 #else | |
135 | |
136 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
137 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
138 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
139 #define W4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
140 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
141 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
142 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | |
143 #define COL_SHIFT 31 // 6 | |
144 | |
145 static void inline idctRow (int32_t *out, int16_t * row) | |
146 { | |
147 int a0, a1, a2, a3, b0, b1, b2, b3; | |
148 const int C1 =W1; | |
149 const int C2 =W2; | |
150 const int C3 =W3; | |
151 const int C4 =W4; | |
152 const int C5 =W5; | |
153 const int C6 =W6; | |
154 const int C7 =W7; | |
155 /* | |
156 if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7])) { | |
157 row[0] = row[1] = row[2] = row[3] = row[4] = | |
158 row[5] = row[6] = row[7] = row[0]<<14; | |
159 return; | |
160 } | |
161 */ | |
162 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6]; | |
163 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6]; | |
164 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6]; | |
165 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6]; | |
166 | |
167 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7]; | |
168 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7]; | |
169 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7]; | |
170 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7]; | |
171 | |
172 out[0] = (a0 + b0); | |
173 out[1] = (a1 + b1); | |
174 out[2] = (a2 + b2); | |
175 out[3] = (a3 + b3); | |
176 out[4] = (a3 - b3); | |
177 out[5] = (a2 - b2); | |
178 out[6] = (a1 - b1); | |
179 out[7] = (a0 - b0); | |
180 } | |
181 | |
182 static void inline idctCol (int32_t *in, int16_t * col) | |
183 { | |
184 int64_t a0, a1, a2, a3, b0, b1, b2, b3; | |
185 const int64_t C1 =W1; | |
186 const int64_t C2 =W2; | |
187 const int64_t C3 =W3; | |
188 const int64_t C4 =W4; | |
189 const int64_t C5 =W5; | |
190 const int64_t C6 =W6; | |
191 const int64_t C7 =W7; | |
192 /* | |
193 if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) { | |
194 col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] = | |
195 col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3; | |
196 return; | |
197 }*/ | |
198 in[0] += (1<<(COL_SHIFT-1))/W4; | |
199 a0 = C4*in[8*0] + C2*in[8*2] + C4*in[8*4] + C6*in[8*6]; | |
200 a1 = C4*in[8*0] + C6*in[8*2] - C4*in[8*4] - C2*in[8*6]; | |
201 a2 = C4*in[8*0] - C6*in[8*2] - C4*in[8*4] + C2*in[8*6]; | |
202 a3 = C4*in[8*0] - C2*in[8*2] + C4*in[8*4] - C6*in[8*6]; | |
203 | |
204 b0 = C1*in[8*1] + C3*in[8*3] + C5*in[8*5] + C7*in[8*7]; | |
205 b1 = C3*in[8*1] - C7*in[8*3] - C1*in[8*5] - C5*in[8*7]; | |
206 b2 = C5*in[8*1] - C1*in[8*3] + C7*in[8*5] + C3*in[8*7]; | |
207 b3 = C7*in[8*1] - C5*in[8*3] + C3*in[8*5] - C1*in[8*7]; | |
208 | |
209 col[8*0] = (a0 + b0) >> COL_SHIFT; | |
210 col[8*1] = (a1 + b1) >> COL_SHIFT; | |
211 col[8*2] = (a2 + b2) >> COL_SHIFT; | |
212 col[8*3] = (a3 + b3) >> COL_SHIFT; | 336 col[8*3] = (a3 + b3) >> COL_SHIFT; |
213 col[8*4] = (a3 - b3) >> COL_SHIFT; | 337 col[8*4] = (a3 - b3) >> COL_SHIFT; |
214 col[8*5] = (a2 - b2) >> COL_SHIFT; | 338 } |
215 col[8*6] = (a1 - b1) >> COL_SHIFT; | 339 |
216 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
217 } | |
218 | 340 |
219 void simple_idct (short *block) | 341 void simple_idct (short *block) |
220 { | 342 { |
343 | |
221 int i; | 344 int i; |
222 int32_t temp[64]; | 345 |
346 #if 0 | |
347 int nonZero[8]; | |
348 int buffer[64]; | |
349 int nNonZero=0; | |
350 | |
351 idctRowCondDC(block); | |
352 | |
353 for(i=1; i<8; i++) | |
354 { | |
355 nonZero[nNonZero]=i; | |
356 nNonZero+= idctRowCondZ(block + i*8); | |
357 } | |
358 | |
359 if(nNonZero==0) | |
360 { | |
361 for(i=0; i<8; i++) | |
362 { | |
363 block[i ]= | |
364 block[i+8 ]= | |
365 block[i+16]= | |
366 block[i+24]= | |
367 block[i+32]= | |
368 block[i+40]= | |
369 block[i+48]= | |
370 block[i+56]= (W4*block[i] + (1<<(COL_SHIFT-1))) >> COL_SHIFT; | |
371 } | |
372 } | |
373 else if(nNonZero==1) | |
374 { | |
375 int index= nonZero[0]*8; | |
376 for(i=0; i<8; i++) | |
377 { | |
378 int bias= W4*block[i] + (1<<(COL_SHIFT-1)); | |
379 int c= block[i + index]; | |
380 block[i ]= (c*coeff[index ] + bias) >> COL_SHIFT; | |
381 block[i+8 ]= (c*coeff[index+1] + bias) >> COL_SHIFT; | |
382 block[i+16]= (c*coeff[index+2] + bias) >> COL_SHIFT; | |
383 block[i+24]= (c*coeff[index+3] + bias) >> COL_SHIFT; | |
384 block[i+32]= (c*coeff[index+4] + bias) >> COL_SHIFT; | |
385 block[i+40]= (c*coeff[index+5] + bias) >> COL_SHIFT; | |
386 block[i+48]= (c*coeff[index+6] + bias) >> COL_SHIFT; | |
387 block[i+56]= (c*coeff[index+7] + bias) >> COL_SHIFT; | |
388 } | |
389 } | |
390 /* else if(nNonZero==2) | |
391 { | |
392 int index1= nonZero[0]*8; | |
393 int index2= nonZero[1]*8; | |
394 for(i=0; i<8; i++) | |
395 { | |
396 int bias= W4*block[i] + (1<<(COL_SHIFT-1)); | |
397 int c1= block[i + index1]; | |
398 int c2= block[i + index2]; | |
399 block[i ]= (c1*coeff[index1 ] + c2*coeff[index2 ] + bias) >> COL_SHIFT; | |
400 block[i+8 ]= (c1*coeff[index1+1] + c2*coeff[index2+1] + bias) >> COL_SHIFT; | |
401 block[i+16]= (c1*coeff[index1+2] + c2*coeff[index2+2] + bias) >> COL_SHIFT; | |
402 block[i+24]= (c1*coeff[index1+3] + c2*coeff[index2+3] + bias) >> COL_SHIFT; | |
403 block[i+32]= (c1*coeff[index1+4] + c2*coeff[index2+4] + bias) >> COL_SHIFT; | |
404 block[i+40]= (c1*coeff[index1+5] + c2*coeff[index2+5] + bias) >> COL_SHIFT; | |
405 block[i+48]= (c1*coeff[index1+6] + c2*coeff[index2+6] + bias) >> COL_SHIFT; | |
406 block[i+56]= (c1*coeff[index1+7] + c2*coeff[index2+7] + bias) >> COL_SHIFT; | |
407 } | |
408 }*/ | |
409 else | |
410 { | |
411 for(i=0; i<8; i++) | |
412 idctSparse2Col(block + i); | |
413 } | |
414 #else | |
223 for(i=0; i<8; i++) | 415 for(i=0; i<8; i++) |
224 idctRow(temp+8*i, block + 8*i); | 416 idctRowCondDC(block + i*8); |
225 | 417 |
226 for(i=0; i<8; i++) | 418 for(i=0; i<8; i++) |
227 idctCol(temp+i, block + i); | 419 idctSparseCol(block + i); |
228 | |
229 } | |
230 | |
231 #endif | 420 #endif |
421 } |