Mercurial > libavcodec.hg
comparison vp3dsp.c @ 2693:02925a3903b6 libavcodec
porting vp3 idct over to lavc idct api
author | michael |
---|---|
date | Tue, 17 May 2005 11:12:04 +0000 |
parents | fb8f26fc57eb |
children | fd5d7c732c6b |
comparison
equal
deleted
inserted
replaced
2692:fb8f26fc57eb | 2693:02925a3903b6 |
---|---|
34 #define xC4S4 46341 | 34 #define xC4S4 46341 |
35 #define xC5S3 36410 | 35 #define xC5S3 36410 |
36 #define xC6S2 25080 | 36 #define xC6S2 25080 |
37 #define xC7S1 12785 | 37 #define xC7S1 12785 |
38 | 38 |
39 void vp3_dsp_init_c(void) | 39 static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type) |
40 { | 40 { |
41 /* nop */ | 41 int16_t *ip = input; |
42 } | 42 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
43 | 43 |
44 void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, | 44 int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_; |
45 int coeff_count, int16_t *output_data) | 45 int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd; |
46 { | 46 int t1, t2; |
47 int32_t dequantized_data[64]; | |
48 int32_t *ip = dequantized_data; | |
49 int16_t *op = output_data; | |
50 | |
51 int32_t A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_; | |
52 int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd; | |
53 int32_t t1, t2; | |
54 | 47 |
55 int i, j; | 48 int i, j; |
56 | 49 |
57 /* de-zigzag and dequantize */ | |
58 for (i = 0; i < coeff_count; i++) { | |
59 dequantized_data[i] = dequant_matrix[i] * input_data[i]; | |
60 } | |
61 | |
62 /* Inverse DCT on the rows now */ | 50 /* Inverse DCT on the rows now */ |
63 for (i = 0; i < 8; i++) { | 51 for (i = 0; i < 8; i++) { |
64 /* Check for non-zero values */ | 52 /* Check for non-zero values */ |
65 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) { | 53 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) { |
66 t1 = (int32_t)(xC1S7 * ip[1]); | 54 t1 = (int32_t)(xC1S7 * ip[1]); |
129 | 117 |
130 _Fd = F_ - _Ad; | 118 _Fd = F_ - _Ad; |
131 _Hd = _Bd + H_; | 119 _Hd = _Bd + H_; |
132 | 120 |
133 /* Final sequence of operations over-write original inputs. */ | 121 /* Final sequence of operations over-write original inputs. */ |
134 ip[0] = (int16_t)((_Gd + _Cd ) >> 0); | 122 ip[0] = _Gd + _Cd ; |
135 ip[7] = (int16_t)((_Gd - _Cd ) >> 0); | 123 ip[7] = _Gd - _Cd ; |
136 | 124 |
137 ip[1] = (int16_t)((_Add + _Hd ) >> 0); | 125 ip[1] = _Add + _Hd; |
138 ip[2] = (int16_t)((_Add - _Hd ) >> 0); | 126 ip[2] = _Add - _Hd; |
139 | 127 |
140 ip[3] = (int16_t)((_Ed + _Dd ) >> 0); | 128 ip[3] = _Ed + _Dd ; |
141 ip[4] = (int16_t)((_Ed - _Dd ) >> 0); | 129 ip[4] = _Ed - _Dd ; |
142 | 130 |
143 ip[5] = (int16_t)((_Fd + _Bdd ) >> 0); | 131 ip[5] = _Fd + _Bdd; |
144 ip[6] = (int16_t)((_Fd - _Bdd ) >> 0); | 132 ip[6] = _Fd - _Bdd; |
145 | 133 |
146 } | 134 } |
147 | 135 |
148 ip += 8; /* next row */ | 136 ip += 8; /* next row */ |
149 } | 137 } |
150 | 138 |
151 ip = dequantized_data; | 139 ip = input; |
152 | 140 |
153 for ( i = 0; i < 8; i++) { | 141 for ( i = 0; i < 8; i++) { |
154 /* Check for non-zero values (bitwise or faster than ||) */ | 142 /* Check for non-zero values (bitwise or faster than ||) */ |
155 if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | | 143 if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | |
156 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { | 144 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { |
157 | 145 |
158 t1 = (int32_t)(xC1S7 * ip[1*8]); | 146 t1 = (int32_t)(xC1S7 * ip[1*8]); |
159 t2 = (int32_t)(xC7S1 * ip[7*8]); | 147 t2 = (int32_t)(xC7S1 * ip[7*8]); |
160 t1 >>= 16; | 148 t1 >>= 16; |
220 _Bdd = _Bd - H_; | 208 _Bdd = _Bd - H_; |
221 | 209 |
222 _Fd = F_ - _Ad; | 210 _Fd = F_ - _Ad; |
223 _Hd = _Bd + H_; | 211 _Hd = _Bd + H_; |
224 | 212 |
213 if(type==1){ //HACK | |
214 _Gd += 16*128; | |
215 _Add+= 16*128; | |
216 _Ed += 16*128; | |
217 _Fd += 16*128; | |
218 } | |
225 _Gd += IdctAdjustBeforeShift; | 219 _Gd += IdctAdjustBeforeShift; |
226 _Add += IdctAdjustBeforeShift; | 220 _Add += IdctAdjustBeforeShift; |
227 _Ed += IdctAdjustBeforeShift; | 221 _Ed += IdctAdjustBeforeShift; |
228 _Fd += IdctAdjustBeforeShift; | 222 _Fd += IdctAdjustBeforeShift; |
229 | 223 |
230 /* Final sequence of operations over-write original inputs. */ | 224 /* Final sequence of operations over-write original inputs. */ |
231 op[0*8] = (int16_t)((_Gd + _Cd ) >> 4); | 225 if(type==0){ |
232 op[7*8] = (int16_t)((_Gd - _Cd ) >> 4); | 226 ip[0*8] = (_Gd + _Cd ) >> 4; |
233 | 227 ip[7*8] = (_Gd - _Cd ) >> 4; |
234 op[1*8] = (int16_t)((_Add + _Hd ) >> 4); | 228 |
235 op[2*8] = (int16_t)((_Add - _Hd ) >> 4); | 229 ip[1*8] = (_Add + _Hd ) >> 4; |
236 | 230 ip[2*8] = (_Add - _Hd ) >> 4; |
237 op[3*8] = (int16_t)((_Ed + _Dd ) >> 4); | 231 |
238 op[4*8] = (int16_t)((_Ed - _Dd ) >> 4); | 232 ip[3*8] = (_Ed + _Dd ) >> 4; |
239 | 233 ip[4*8] = (_Ed - _Dd ) >> 4; |
240 op[5*8] = (int16_t)((_Fd + _Bdd ) >> 4); | 234 |
241 op[6*8] = (int16_t)((_Fd - _Bdd ) >> 4); | 235 ip[5*8] = (_Fd + _Bdd ) >> 4; |
236 ip[6*8] = (_Fd - _Bdd ) >> 4; | |
237 }else if(type==1){ | |
238 dst[0*stride] = cm[(_Gd + _Cd ) >> 4]; | |
239 dst[7*stride] = cm[(_Gd - _Cd ) >> 4]; | |
240 | |
241 dst[1*stride] = cm[(_Add + _Hd ) >> 4]; | |
242 dst[2*stride] = cm[(_Add - _Hd ) >> 4]; | |
243 | |
244 dst[3*stride] = cm[(_Ed + _Dd ) >> 4]; | |
245 dst[4*stride] = cm[(_Ed - _Dd ) >> 4]; | |
246 | |
247 dst[5*stride] = cm[(_Fd + _Bdd ) >> 4]; | |
248 dst[6*stride] = cm[(_Fd - _Bdd ) >> 4]; | |
249 }else{ | |
250 dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd ) >> 4)]; | |
251 dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd ) >> 4)]; | |
252 | |
253 dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)]; | |
254 dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)]; | |
255 | |
256 dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd ) >> 4)]; | |
257 dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd ) >> 4)]; | |
258 | |
259 dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)]; | |
260 dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)]; | |
261 } | |
242 | 262 |
243 } else { | 263 } else { |
244 | 264 if(type==0){ |
245 op[0*8] = 0; | 265 ip[0*8] = |
246 op[7*8] = 0; | 266 ip[1*8] = |
247 op[1*8] = 0; | 267 ip[2*8] = |
248 op[2*8] = 0; | 268 ip[3*8] = |
249 op[3*8] = 0; | 269 ip[4*8] = |
250 op[4*8] = 0; | 270 ip[5*8] = |
251 op[5*8] = 0; | 271 ip[6*8] = |
252 op[6*8] = 0; | 272 ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); |
273 }else if(type==1){ | |
274 dst[0*stride]= | |
275 dst[1*stride]= | |
276 dst[2*stride]= | |
277 dst[3*stride]= | |
278 dst[4*stride]= | |
279 dst[5*stride]= | |
280 dst[6*stride]= | |
281 dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); | |
282 }else{ | |
283 if(ip[0*8]){ | |
284 int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); | |
285 dst[0*stride] = cm[dst[0*stride] + v]; | |
286 dst[1*stride] = cm[dst[1*stride] + v]; | |
287 dst[2*stride] = cm[dst[2*stride] + v]; | |
288 dst[3*stride] = cm[dst[3*stride] + v]; | |
289 dst[4*stride] = cm[dst[4*stride] + v]; | |
290 dst[5*stride] = cm[dst[5*stride] + v]; | |
291 dst[6*stride] = cm[dst[6*stride] + v]; | |
292 dst[7*stride] = cm[dst[7*stride] + v]; | |
293 } | |
294 } | |
253 } | 295 } |
254 | 296 |
255 ip++; /* next column */ | 297 ip++; /* next column */ |
256 op++; | 298 dst++; |
257 } | 299 } |
258 } | 300 } |
301 | |
302 void ff_vp3_idct_c(DCTELEM *block/* align 16*/){ | |
303 idct(NULL, 0, block, 0); | |
304 } | |
305 | |
306 void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ | |
307 idct(dest, line_size, block, 1); | |
308 } | |
309 | |
310 void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ | |
311 idct(dest, line_size, block, 2); | |
312 } |