Mercurial > libavcodec.hg
comparison simple_idct.c @ 476:ec13b0a726c3 libavcodec
removed unused code - began to merge alpha specific stuff - added mac macros for suitable CPUs
author | bellard |
---|---|
date | Wed, 05 Jun 2002 14:25:06 +0000 |
parents | 9b73bce5071a |
children | 40ffce2cb6ef |
comparison
equal
deleted
inserted
replaced
475:6ccc48e4c41b | 476:ec13b0a726c3 |
---|---|
45 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 | 45 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
46 #define ROW_SHIFT 11 | 46 #define ROW_SHIFT 11 |
47 #define COL_SHIFT 20 // 6 | 47 #define COL_SHIFT 20 // 6 |
48 #endif | 48 #endif |
49 | 49 |
50 /* 8x8 Matrix used to do a trivial (slow) 8 point IDCT */ | 50 #ifdef ARCH_ALPHA |
51 static int coeff[64]={ | 51 #define FAST_64BIT |
52 W4, W4, W4, W4, W4, W4, W4, W4, | 52 #endif |
53 W1, W3, W5, W7,-W7,-W5,-W3,-W1, | 53 |
54 W2, W6,-W6,-W2,-W2,-W6, W6, W2, | 54 #if defined(ARCH_POWERPC_405) |
55 W3,-W7,-W1,-W5, W5, W1, W7,-W3, | 55 |
56 W4,-W4,-W4, W4, W4,-W4,-W4, W4, | 56 /* signed 16x16 -> 32 multiply add accumulate */ |
57 W5,-W1, W7, W3,-W3,-W7, W1,-W5, | 57 #define MAC16(rt, ra, rb) \ |
58 W6,-W2, W2,-W6,-W6, W2,-W2, W6, | 58 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
59 W7,-W5, W3,-W1, W1,-W3, W5,-W7 | 59 |
60 }; | 60 /* signed 16x16 -> 32 multiply */ |
61 | 61 #define MUL16(rt, ra, rb) \ |
62 static inline int idctRowCondZ (int16_t * row) | 62 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
63 { | 63 |
64 int a0, a1, a2, a3, b0, b1, b2, b3; | 64 #else |
65 | 65 |
66 if( !( ((uint32_t*)row)[0]|((uint32_t*)row)[1] |((uint32_t*)row)[2] |((uint32_t*)row)[3])) { | 66 /* signed 16x16 -> 32 multiply add accumulate */ |
67 /* row[0] = row[1] = row[2] = row[3] = row[4] = | 67 #define MAC16(rt, ra, rb) rt += (ra) * (rb) |
68 row[5] = row[6] = row[7] = 0;*/ | 68 |
69 return 0; | 69 /* signed 16x16 -> 32 multiply */ |
70 } | 70 #define MUL16(rt, ra, rb) rt = (ra) * (rb) |
71 | 71 |
72 if(!( ((uint32_t*)row)[2] |((uint32_t*)row)[3] )){ | 72 #endif |
73 a0 = W4*row[0] + W2*row[2] + (1<<(ROW_SHIFT-1)); | |
74 a1 = W4*row[0] + W6*row[2] + (1<<(ROW_SHIFT-1)); | |
75 a2 = W4*row[0] - W6*row[2] + (1<<(ROW_SHIFT-1)); | |
76 a3 = W4*row[0] - W2*row[2] + (1<<(ROW_SHIFT-1)); | |
77 | |
78 b0 = W1*row[1] + W3*row[3]; | |
79 b1 = W3*row[1] - W7*row[3]; | |
80 b2 = W5*row[1] - W1*row[3]; | |
81 b3 = W7*row[1] - W5*row[3]; | |
82 }else{ | |
83 a0 = W4*row[0] + W2*row[2] + W4*row[4] + W6*row[6] + (1<<(ROW_SHIFT-1)); | |
84 a1 = W4*row[0] + W6*row[2] - W4*row[4] - W2*row[6] + (1<<(ROW_SHIFT-1)); | |
85 a2 = W4*row[0] - W6*row[2] - W4*row[4] + W2*row[6] + (1<<(ROW_SHIFT-1)); | |
86 a3 = W4*row[0] - W2*row[2] + W4*row[4] - W6*row[6] + (1<<(ROW_SHIFT-1)); | |
87 | |
88 b0 = W1*row[1] + W3*row[3] + W5*row[5] + W7*row[7]; | |
89 b1 = W3*row[1] - W7*row[3] - W1*row[5] - W5*row[7]; | |
90 b2 = W5*row[1] - W1*row[3] + W7*row[5] + W3*row[7]; | |
91 b3 = W7*row[1] - W5*row[3] + W3*row[5] - W1*row[7]; | |
92 } | |
93 | |
94 row[0] = (a0 + b0) >> ROW_SHIFT; | |
95 row[1] = (a1 + b1) >> ROW_SHIFT; | |
96 row[2] = (a2 + b2) >> ROW_SHIFT; | |
97 row[3] = (a3 + b3) >> ROW_SHIFT; | |
98 row[4] = (a3 - b3) >> ROW_SHIFT; | |
99 row[5] = (a2 - b2) >> ROW_SHIFT; | |
100 row[6] = (a1 - b1) >> ROW_SHIFT; | |
101 row[7] = (a0 - b0) >> ROW_SHIFT; | |
102 | |
103 return 1; | |
104 } | |
105 | 73 |
106 #ifdef ARCH_ALPHA | 74 #ifdef ARCH_ALPHA |
107 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ | 75 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ |
108 static inline int idctRowCondDC(int16_t *row) | 76 static inline int idctRowCondDC(int16_t *row) |
109 { | 77 { |
127 | 95 |
128 return 1; | 96 return 1; |
129 } | 97 } |
130 } | 98 } |
131 | 99 |
132 a0 = W4 * row[0]; | 100 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
133 a1 = W4 * row[0]; | 101 a1 = a0; |
134 a2 = W4 * row[0]; | 102 a2 = a0; |
135 a3 = W4 * row[0]; | 103 a3 = a0; |
136 | 104 |
137 if (row[2]) { | 105 if (row[2]) { |
138 a0 += W2 * row[2]; | 106 a0 += W2 * row[2]; |
139 a1 += W6 * row[2]; | 107 a1 += W6 * row[2]; |
140 a2 -= W6 * row[2]; | 108 a2 -= W6 * row[2]; |
152 a0 += W6 * row[6]; | 120 a0 += W6 * row[6]; |
153 a1 -= W2 * row[6]; | 121 a1 -= W2 * row[6]; |
154 a2 += W2 * row[6]; | 122 a2 += W2 * row[6]; |
155 a3 -= W6 * row[6]; | 123 a3 -= W6 * row[6]; |
156 } | 124 } |
157 | |
158 a0 += 1 << (ROW_SHIFT - 1); | |
159 a1 += 1 << (ROW_SHIFT - 1); | |
160 a2 += 1 << (ROW_SHIFT - 1); | |
161 a3 += 1 << (ROW_SHIFT - 1); | |
162 | 125 |
163 if (row[1]) { | 126 if (row[1]) { |
164 b0 = W1 * row[1]; | 127 b0 = W1 * row[1]; |
165 b1 = W3 * row[1]; | 128 b1 = W3 * row[1]; |
166 b2 = W5 * row[1]; | 129 b2 = W5 * row[1]; |
203 row[7] = (a0 - b0) >> ROW_SHIFT; | 166 row[7] = (a0 - b0) >> ROW_SHIFT; |
204 | 167 |
205 return 2; | 168 return 2; |
206 } | 169 } |
207 #else /* not ARCH_ALPHA */ | 170 #else /* not ARCH_ALPHA */ |
208 static inline int idctRowCondDC (int16_t * row) | 171 |
172 static inline void idctRowCondDC (int16_t * row) | |
209 { | 173 { |
210 int a0, a1, a2, a3, b0, b1, b2, b3; | 174 int a0, a1, a2, a3, b0, b1, b2, b3; |
211 | 175 #ifdef FAST_64BIT |
212 if( !( ((uint32_t*)row)[1] |((uint32_t*)row)[2] |((uint32_t*)row)[3]| row[1])) { | 176 uint64_t temp; |
213 // row[0] = row[1] = row[2] = row[3] = row[4] = row[5] = row[6] = row[7] = row[0]<<3; | 177 #else |
214 uint16_t temp= row[0]<<3; | 178 uint32_t temp; |
215 ((uint32_t*)row)[0]=((uint32_t*)row)[1]= | 179 #endif |
216 ((uint32_t*)row)[2]=((uint32_t*)row)[3]= temp + (temp<<16); | 180 |
217 return 0; | 181 #ifdef FAST_64BIT |
218 } | 182 #ifdef WORDS_BIGENDIAN |
219 | 183 #define ROW0_MASK 0xffff000000000000LL |
220 if(!( ((uint32_t*)row)[2] |((uint32_t*)row)[3] )){ | 184 #else |
221 a0 = W4*row[0] + W2*row[2] + (1<<(ROW_SHIFT-1)); | 185 #define ROW0_MASK 0xffffLL |
222 a1 = W4*row[0] + W6*row[2] + (1<<(ROW_SHIFT-1)); | 186 #endif |
223 a2 = W4*row[0] - W6*row[2] + (1<<(ROW_SHIFT-1)); | 187 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
224 a3 = W4*row[0] - W2*row[2] + (1<<(ROW_SHIFT-1)); | 188 ((uint64_t *)row)[1]) == 0) { |
225 | 189 temp = (row[0] << 3) & 0xffff; |
226 b0 = W1*row[1] + W3*row[3]; | 190 temp += temp << 16; |
227 b1 = W3*row[1] - W7*row[3]; | 191 temp += temp << 32; |
228 b2 = W5*row[1] - W1*row[3]; | 192 ((uint64_t *)row)[0] = temp; |
229 b3 = W7*row[1] - W5*row[3]; | 193 ((uint64_t *)row)[1] = temp; |
230 }else{ | 194 return; |
231 a0 = W4*row[0] + W2*row[2] + W4*row[4] + W6*row[6] + (1<<(ROW_SHIFT-1)); | 195 } |
232 a1 = W4*row[0] + W6*row[2] - W4*row[4] - W2*row[6] + (1<<(ROW_SHIFT-1)); | 196 #else |
233 a2 = W4*row[0] - W6*row[2] - W4*row[4] + W2*row[6] + (1<<(ROW_SHIFT-1)); | 197 if (!(((uint32_t*)row)[1] | |
234 a3 = W4*row[0] - W2*row[2] + W4*row[4] - W6*row[6] + (1<<(ROW_SHIFT-1)); | 198 ((uint32_t*)row)[2] | |
235 | 199 ((uint32_t*)row)[3] | |
236 b0 = W1*row[1] + W3*row[3] + W5*row[5] + W7*row[7]; | 200 row[1])) { |
237 b1 = W3*row[1] - W7*row[3] - W1*row[5] - W5*row[7]; | 201 temp = (row[0] << 3) & 0xffff; |
238 b2 = W5*row[1] - W1*row[3] + W7*row[5] + W3*row[7]; | 202 temp += temp << 16; |
239 b3 = W7*row[1] - W5*row[3] + W3*row[5] - W1*row[7]; | 203 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
204 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; | |
205 return; | |
206 } | |
207 #endif | |
208 | |
209 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); | |
210 a1 = a0; | |
211 a2 = a0; | |
212 a3 = a0; | |
213 | |
214 /* no need to optimize : gcc does it */ | |
215 a0 += W2 * row[2]; | |
216 a1 += W6 * row[2]; | |
217 a2 -= W6 * row[2]; | |
218 a3 -= W2 * row[2]; | |
219 | |
220 MUL16(b0, W1, row[1]); | |
221 MAC16(b0, W3, row[3]); | |
222 MUL16(b1, W3, row[1]); | |
223 MAC16(b1, -W7, row[3]); | |
224 MUL16(b2, W5, row[1]); | |
225 MAC16(b2, -W1, row[3]); | |
226 MUL16(b3, W7, row[1]); | |
227 MAC16(b3, -W5, row[3]); | |
228 | |
229 #ifdef FAST_64BIT | |
230 temp = ((uint64_t*)row)[1]; | |
231 #else | |
232 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; | |
233 #endif | |
234 if (temp != 0) { | |
235 a0 += W4*row[4] + W6*row[6]; | |
236 a1 += - W4*row[4] - W2*row[6]; | |
237 a2 += - W4*row[4] + W2*row[6]; | |
238 a3 += W4*row[4] - W6*row[6]; | |
239 | |
240 MAC16(b0, W5, row[5]); | |
241 MAC16(b0, W7, row[7]); | |
242 | |
243 MAC16(b1, -W1, row[5]); | |
244 MAC16(b1, -W5, row[7]); | |
245 | |
246 MAC16(b2, W7, row[5]); | |
247 MAC16(b2, W3, row[7]); | |
248 | |
249 MAC16(b3, W3, row[5]); | |
250 MAC16(b3, -W1, row[7]); | |
240 } | 251 } |
241 | 252 |
242 row[0] = (a0 + b0) >> ROW_SHIFT; | 253 row[0] = (a0 + b0) >> ROW_SHIFT; |
243 row[7] = (a0 - b0) >> ROW_SHIFT; | 254 row[7] = (a0 - b0) >> ROW_SHIFT; |
244 row[1] = (a1 + b1) >> ROW_SHIFT; | 255 row[1] = (a1 + b1) >> ROW_SHIFT; |
245 row[6] = (a1 - b1) >> ROW_SHIFT; | 256 row[6] = (a1 - b1) >> ROW_SHIFT; |
246 row[2] = (a2 + b2) >> ROW_SHIFT; | 257 row[2] = (a2 + b2) >> ROW_SHIFT; |
247 row[5] = (a2 - b2) >> ROW_SHIFT; | 258 row[5] = (a2 - b2) >> ROW_SHIFT; |
248 row[3] = (a3 + b3) >> ROW_SHIFT; | 259 row[3] = (a3 + b3) >> ROW_SHIFT; |
249 row[4] = (a3 - b3) >> ROW_SHIFT; | 260 row[4] = (a3 - b3) >> ROW_SHIFT; |
250 | |
251 return 1; | |
252 } | 261 } |
253 #endif /* not ARCH_ALPHA */ | 262 #endif /* not ARCH_ALPHA */ |
254 | 263 |
255 static inline void idctCol (int16_t * col) | 264 static inline void idctSparseCol (int16_t * col) |
256 { | 265 { |
257 | |
258 /* | |
259 if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) { | |
260 col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] = | |
261 col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3; | |
262 return; | |
263 }*/ | |
264 | |
265 int a0, a1, a2, a3, b0, b1, b2, b3; | 266 int a0, a1, a2, a3, b0, b1, b2, b3; |
266 col[0] += (1<<(COL_SHIFT-1))/W4; | 267 |
267 a0 = W4*col[8*0] + W2*col[8*2] + W4*col[8*4] + W6*col[8*6]; | 268 /* XXX: I did that only to give same values as previous code */ |
268 a1 = W4*col[8*0] + W6*col[8*2] - W4*col[8*4] - W2*col[8*6]; | 269 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
269 a2 = W4*col[8*0] - W6*col[8*2] - W4*col[8*4] + W2*col[8*6]; | 270 a1 = a0; |
270 a3 = W4*col[8*0] - W2*col[8*2] + W4*col[8*4] - W6*col[8*6]; | 271 a2 = a0; |
271 | 272 a3 = a0; |
272 b0 = W1*col[8*1] + W3*col[8*3] + W5*col[8*5] + W7*col[8*7]; | 273 |
273 b1 = W3*col[8*1] - W7*col[8*3] - W1*col[8*5] - W5*col[8*7]; | 274 a0 += + W2*col[8*2]; |
274 b2 = W5*col[8*1] - W1*col[8*3] + W7*col[8*5] + W3*col[8*7]; | 275 a1 += + W6*col[8*2]; |
275 b3 = W7*col[8*1] - W5*col[8*3] + W3*col[8*5] - W1*col[8*7]; | 276 a2 += - W6*col[8*2]; |
276 | 277 a3 += - W2*col[8*2]; |
277 col[8*0] = (a0 + b0) >> COL_SHIFT; | 278 |
278 col[8*7] = (a0 - b0) >> COL_SHIFT; | 279 MUL16(b0, W1, col[8*1]); |
279 col[8*1] = (a1 + b1) >> COL_SHIFT; | 280 MUL16(b1, W3, col[8*1]); |
280 col[8*6] = (a1 - b1) >> COL_SHIFT; | 281 MUL16(b2, W5, col[8*1]); |
281 col[8*2] = (a2 + b2) >> COL_SHIFT; | 282 MUL16(b3, W7, col[8*1]); |
282 col[8*5] = (a2 - b2) >> COL_SHIFT; | 283 |
283 col[8*3] = (a3 + b3) >> COL_SHIFT; | 284 MAC16(b0, + W3, col[8*3]); |
284 col[8*4] = (a3 - b3) >> COL_SHIFT; | 285 MAC16(b1, - W7, col[8*3]); |
285 } | 286 MAC16(b2, - W1, col[8*3]); |
286 | 287 MAC16(b3, - W5, col[8*3]); |
287 static inline void idctSparseCol (int16_t * col) | |
288 { | |
289 int a0, a1, a2, a3, b0, b1, b2, b3; | |
290 col[0] += (1<<(COL_SHIFT-1))/W4; | |
291 a0 = W4*col[8*0]; | |
292 a1 = W4*col[8*0]; | |
293 a2 = W4*col[8*0]; | |
294 a3 = W4*col[8*0]; | |
295 | |
296 if(col[8*2]){ | |
297 a0 += + W2*col[8*2]; | |
298 a1 += + W6*col[8*2]; | |
299 a2 += - W6*col[8*2]; | |
300 a3 += - W2*col[8*2]; | |
301 } | |
302 | 288 |
303 if(col[8*4]){ | 289 if(col[8*4]){ |
304 a0 += + W4*col[8*4]; | 290 a0 += + W4*col[8*4]; |
305 a1 += - W4*col[8*4]; | 291 a1 += - W4*col[8*4]; |
306 a2 += - W4*col[8*4]; | 292 a2 += - W4*col[8*4]; |
307 a3 += + W4*col[8*4]; | 293 a3 += + W4*col[8*4]; |
294 } | |
295 | |
296 if (col[8*5]) { | |
297 MAC16(b0, + W5, col[8*5]); | |
298 MAC16(b1, - W1, col[8*5]); | |
299 MAC16(b2, + W7, col[8*5]); | |
300 MAC16(b3, + W3, col[8*5]); | |
308 } | 301 } |
309 | 302 |
310 if(col[8*6]){ | 303 if(col[8*6]){ |
311 a0 += + W6*col[8*6]; | 304 a0 += + W6*col[8*6]; |
312 a1 += - W2*col[8*6]; | 305 a1 += - W2*col[8*6]; |
313 a2 += + W2*col[8*6]; | 306 a2 += + W2*col[8*6]; |
314 a3 += - W6*col[8*6]; | 307 a3 += - W6*col[8*6]; |
315 } | 308 } |
316 | 309 |
317 if(col[8*1]){ | 310 if (col[8*7]) { |
318 b0 = W1*col[8*1]; | 311 MAC16(b0, + W7, col[8*7]); |
319 b1 = W3*col[8*1]; | 312 MAC16(b1, - W5, col[8*7]); |
320 b2 = W5*col[8*1]; | 313 MAC16(b2, + W3, col[8*7]); |
321 b3 = W7*col[8*1]; | 314 MAC16(b3, - W1, col[8*7]); |
322 }else{ | 315 } |
323 b0 = | 316 |
324 b1 = | 317 col[8*0] = (a0 + b0) >> COL_SHIFT; |
325 b2 = | 318 col[8*7] = (a0 - b0) >> COL_SHIFT; |
326 b3 = 0; | 319 col[8*1] = (a1 + b1) >> COL_SHIFT; |
327 } | 320 col[8*6] = (a1 - b1) >> COL_SHIFT; |
328 | 321 col[8*2] = (a2 + b2) >> COL_SHIFT; |
329 if(col[8*3]){ | 322 col[8*5] = (a2 - b2) >> COL_SHIFT; |
330 b0 += + W3*col[8*3]; | 323 col[8*3] = (a3 + b3) >> COL_SHIFT; |
331 b1 += - W7*col[8*3]; | 324 col[8*4] = (a3 - b3) >> COL_SHIFT; |
332 b2 += - W1*col[8*3]; | |
333 b3 += - W5*col[8*3]; | |
334 } | |
335 | |
336 if(col[8*5]){ | |
337 b0 += + W5*col[8*5]; | |
338 b1 += - W1*col[8*5]; | |
339 b2 += + W7*col[8*5]; | |
340 b3 += + W3*col[8*5]; | |
341 } | |
342 | |
343 if(col[8*7]){ | |
344 b0 += + W7*col[8*7]; | |
345 b1 += - W5*col[8*7]; | |
346 b2 += + W3*col[8*7]; | |
347 b3 += - W1*col[8*7]; | |
348 } | |
349 | |
350 #ifndef ARCH_ALPHA | |
351 if(!(b0|b1|b2|b3)){ | |
352 col[8*0] = (a0) >> COL_SHIFT; | |
353 col[8*7] = (a0) >> COL_SHIFT; | |
354 col[8*1] = (a1) >> COL_SHIFT; | |
355 col[8*6] = (a1) >> COL_SHIFT; | |
356 col[8*2] = (a2) >> COL_SHIFT; | |
357 col[8*5] = (a2) >> COL_SHIFT; | |
358 col[8*3] = (a3) >> COL_SHIFT; | |
359 col[8*4] = (a3) >> COL_SHIFT; | |
360 }else{ | |
361 #endif | |
362 col[8*0] = (a0 + b0) >> COL_SHIFT; | |
363 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
364 col[8*1] = (a1 + b1) >> COL_SHIFT; | |
365 col[8*6] = (a1 - b1) >> COL_SHIFT; | |
366 col[8*2] = (a2 + b2) >> COL_SHIFT; | |
367 col[8*5] = (a2 - b2) >> COL_SHIFT; | |
368 col[8*3] = (a3 + b3) >> COL_SHIFT; | |
369 col[8*4] = (a3 - b3) >> COL_SHIFT; | |
370 #ifndef ARCH_ALPHA | |
371 } | |
372 #endif | |
373 } | |
374 | |
375 static inline void idctSparse2Col (int16_t * col) | |
376 { | |
377 int a0, a1, a2, a3, b0, b1, b2, b3; | |
378 col[0] += (1<<(COL_SHIFT-1))/W4; | |
379 a0 = W4*col[8*0]; | |
380 a1 = W4*col[8*0]; | |
381 a2 = W4*col[8*0]; | |
382 a3 = W4*col[8*0]; | |
383 | |
384 if(col[8*2]){ | |
385 a0 += + W2*col[8*2]; | |
386 a1 += + W6*col[8*2]; | |
387 a2 += - W6*col[8*2]; | |
388 a3 += - W2*col[8*2]; | |
389 } | |
390 | |
391 if(col[8*4]){ | |
392 a0 += + W4*col[8*4]; | |
393 a1 += - W4*col[8*4]; | |
394 a2 += - W4*col[8*4]; | |
395 a3 += + W4*col[8*4]; | |
396 } | |
397 | |
398 if(col[8*6]){ | |
399 a0 += + W6*col[8*6]; | |
400 a1 += - W2*col[8*6]; | |
401 a2 += + W2*col[8*6]; | |
402 a3 += - W6*col[8*6]; | |
403 } | |
404 | |
405 if(col[8*1] || 1){ | |
406 b0 = W1*col[8*1]; | |
407 b1 = W3*col[8*1]; | |
408 b2 = W5*col[8*1]; | |
409 b3 = W7*col[8*1]; | |
410 }else{ | |
411 b0 = | |
412 b1 = | |
413 b2 = | |
414 b3 = 0; | |
415 } | |
416 | |
417 if(col[8*3]){ | |
418 b0 += + W3*col[8*3]; | |
419 b1 += - W7*col[8*3]; | |
420 b2 += - W1*col[8*3]; | |
421 b3 += - W5*col[8*3]; | |
422 } | |
423 | |
424 if(col[8*5]){ | |
425 b0 += + W5*col[8*5]; | |
426 b1 += - W1*col[8*5]; | |
427 b2 += + W7*col[8*5]; | |
428 b3 += + W3*col[8*5]; | |
429 } | |
430 | |
431 if(col[8*7]){ | |
432 b0 += + W7*col[8*7]; | |
433 b1 += - W5*col[8*7]; | |
434 b2 += + W3*col[8*7]; | |
435 b3 += - W1*col[8*7]; | |
436 } | |
437 | |
438 col[8*0] = (a0 + b0) >> COL_SHIFT; | |
439 col[8*7] = (a0 - b0) >> COL_SHIFT; | |
440 col[8*1] = (a1 + b1) >> COL_SHIFT; | |
441 col[8*6] = (a1 - b1) >> COL_SHIFT; | |
442 col[8*2] = (a2 + b2) >> COL_SHIFT; | |
443 col[8*5] = (a2 - b2) >> COL_SHIFT; | |
444 col[8*3] = (a3 + b3) >> COL_SHIFT; | |
445 col[8*4] = (a3 - b3) >> COL_SHIFT; | |
446 } | 325 } |
447 | 326 |
448 #ifdef ARCH_ALPHA | 327 #ifdef ARCH_ALPHA |
449 /* If all rows but the first one are zero after row transformation, | 328 /* If all rows but the first one are zero after row transformation, |
450 all rows will be identical after column transformation. */ | 329 all rows will be identical after column transformation. */ |
470 lcol[ 8] = l; lcol[ 9] = r; | 349 lcol[ 8] = l; lcol[ 9] = r; |
471 lcol[10] = l; lcol[11] = r; | 350 lcol[10] = l; lcol[11] = r; |
472 lcol[12] = l; lcol[13] = r; | 351 lcol[12] = l; lcol[13] = r; |
473 lcol[14] = l; lcol[15] = r; | 352 lcol[14] = l; lcol[15] = r; |
474 } | 353 } |
475 #endif | |
476 | 354 |
477 void simple_idct (short *block) | 355 void simple_idct (short *block) |
478 { | 356 { |
479 | 357 |
480 int i; | 358 int i; |
481 | |
482 #if 0 | |
483 int nonZero[8]; | |
484 int buffer[64]; | |
485 int nNonZero=0; | |
486 | |
487 idctRowCondDC(block); | |
488 | |
489 for(i=1; i<8; i++) | |
490 { | |
491 nonZero[nNonZero]=i; | |
492 nNonZero+= idctRowCondZ(block + i*8); | |
493 } | |
494 | |
495 if(nNonZero==0) | |
496 { | |
497 for(i=0; i<8; i++) | |
498 { | |
499 block[i ]= | |
500 block[i+8 ]= | |
501 block[i+16]= | |
502 block[i+24]= | |
503 block[i+32]= | |
504 block[i+40]= | |
505 block[i+48]= | |
506 block[i+56]= (W4*block[i] + (1<<(COL_SHIFT-1))) >> COL_SHIFT; | |
507 } | |
508 } | |
509 else if(nNonZero==1) | |
510 { | |
511 int index= nonZero[0]*8; | |
512 for(i=0; i<8; i++) | |
513 { | |
514 int bias= W4*block[i] + (1<<(COL_SHIFT-1)); | |
515 int c= block[i + index]; | |
516 block[i ]= (c*coeff[index ] + bias) >> COL_SHIFT; | |
517 block[i+8 ]= (c*coeff[index+1] + bias) >> COL_SHIFT; | |
518 block[i+16]= (c*coeff[index+2] + bias) >> COL_SHIFT; | |
519 block[i+24]= (c*coeff[index+3] + bias) >> COL_SHIFT; | |
520 block[i+32]= (c*coeff[index+4] + bias) >> COL_SHIFT; | |
521 block[i+40]= (c*coeff[index+5] + bias) >> COL_SHIFT; | |
522 block[i+48]= (c*coeff[index+6] + bias) >> COL_SHIFT; | |
523 block[i+56]= (c*coeff[index+7] + bias) >> COL_SHIFT; | |
524 } | |
525 } | |
526 /* else if(nNonZero==2) | |
527 { | |
528 int index1= nonZero[0]*8; | |
529 int index2= nonZero[1]*8; | |
530 for(i=0; i<8; i++) | |
531 { | |
532 int bias= W4*block[i] + (1<<(COL_SHIFT-1)); | |
533 int c1= block[i + index1]; | |
534 int c2= block[i + index2]; | |
535 block[i ]= (c1*coeff[index1 ] + c2*coeff[index2 ] + bias) >> COL_SHIFT; | |
536 block[i+8 ]= (c1*coeff[index1+1] + c2*coeff[index2+1] + bias) >> COL_SHIFT; | |
537 block[i+16]= (c1*coeff[index1+2] + c2*coeff[index2+2] + bias) >> COL_SHIFT; | |
538 block[i+24]= (c1*coeff[index1+3] + c2*coeff[index2+3] + bias) >> COL_SHIFT; | |
539 block[i+32]= (c1*coeff[index1+4] + c2*coeff[index2+4] + bias) >> COL_SHIFT; | |
540 block[i+40]= (c1*coeff[index1+5] + c2*coeff[index2+5] + bias) >> COL_SHIFT; | |
541 block[i+48]= (c1*coeff[index1+6] + c2*coeff[index2+6] + bias) >> COL_SHIFT; | |
542 block[i+56]= (c1*coeff[index1+7] + c2*coeff[index2+7] + bias) >> COL_SHIFT; | |
543 } | |
544 }*/ | |
545 else | |
546 { | |
547 for(i=0; i<8; i++) | |
548 idctSparse2Col(block + i); | |
549 } | |
550 #elif defined(ARCH_ALPHA) | |
551 int rowsZero = 1; /* all rows except row 0 zero */ | 359 int rowsZero = 1; /* all rows except row 0 zero */ |
552 int rowsConstant = 1; /* all rows consist of a constant value */ | 360 int rowsConstant = 1; /* all rows consist of a constant value */ |
553 | 361 |
554 for (i = 0; i < 8; i++) { | 362 for (i = 0; i < 8; i++) { |
555 int sparseness = idctRowCondDC(block + 8 * i); | 363 int sparseness = idctRowCondDC(block + 8 * i); |
577 } | 385 } |
578 } else { | 386 } else { |
579 for (i = 0; i < 8; i++) | 387 for (i = 0; i < 8; i++) |
580 idctSparseCol(block + i); | 388 idctSparseCol(block + i); |
581 } | 389 } |
582 #else | 390 } |
583 for(i=0; i<8; i++) | 391 |
584 idctRowCondDC(block + i*8); | 392 #else |
585 | 393 |
586 for(i=0; i<8; i++) | 394 void simple_idct (short *block) |
587 idctSparseCol(block + i); | 395 { |
588 #endif | 396 int i; |
589 } | 397 for(i=0; i<8; i++) |
398 idctRowCondDC(block + i*8); | |
399 | |
400 for(i=0; i<8; i++) | |
401 idctSparseCol(block + i); | |
402 } | |
403 | |
404 #endif | |
590 | 405 |
591 #undef COL_SHIFT | 406 #undef COL_SHIFT |