comparison vp3dsp.c @ 2693:02925a3903b6 libavcodec

porting vp3 idct over to lavc idct api
author michael
date Tue, 17 May 2005 11:12:04 +0000
parents fb8f26fc57eb
children fd5d7c732c6b
comparison
equal deleted inserted replaced
2692:fb8f26fc57eb 2693:02925a3903b6
34 #define xC4S4 46341 34 #define xC4S4 46341
35 #define xC5S3 36410 35 #define xC5S3 36410
36 #define xC6S2 25080 36 #define xC6S2 25080
37 #define xC7S1 12785 37 #define xC7S1 12785
38 38
39 void vp3_dsp_init_c(void) 39 static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
40 { 40 {
41 /* nop */ 41 int16_t *ip = input;
42 } 42 uint8_t *cm = cropTbl + MAX_NEG_CROP;
43 43
44 void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, 44 int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
45 int coeff_count, int16_t *output_data) 45 int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
46 { 46 int t1, t2;
47 int32_t dequantized_data[64];
48 int32_t *ip = dequantized_data;
49 int16_t *op = output_data;
50
51 int32_t A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
52 int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
53 int32_t t1, t2;
54 47
55 int i, j; 48 int i, j;
56 49
57 /* de-zigzag and dequantize */
58 for (i = 0; i < coeff_count; i++) {
59 dequantized_data[i] = dequant_matrix[i] * input_data[i];
60 }
61
62 /* Inverse DCT on the rows now */ 50 /* Inverse DCT on the rows now */
63 for (i = 0; i < 8; i++) { 51 for (i = 0; i < 8; i++) {
64 /* Check for non-zero values */ 52 /* Check for non-zero values */
65 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) { 53 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
66 t1 = (int32_t)(xC1S7 * ip[1]); 54 t1 = (int32_t)(xC1S7 * ip[1]);
129 117
130 _Fd = F_ - _Ad; 118 _Fd = F_ - _Ad;
131 _Hd = _Bd + H_; 119 _Hd = _Bd + H_;
132 120
133 /* Final sequence of operations over-write original inputs. */ 121 /* Final sequence of operations over-write original inputs. */
134 ip[0] = (int16_t)((_Gd + _Cd ) >> 0); 122 ip[0] = _Gd + _Cd ;
135 ip[7] = (int16_t)((_Gd - _Cd ) >> 0); 123 ip[7] = _Gd - _Cd ;
136 124
137 ip[1] = (int16_t)((_Add + _Hd ) >> 0); 125 ip[1] = _Add + _Hd;
138 ip[2] = (int16_t)((_Add - _Hd ) >> 0); 126 ip[2] = _Add - _Hd;
139 127
140 ip[3] = (int16_t)((_Ed + _Dd ) >> 0); 128 ip[3] = _Ed + _Dd ;
141 ip[4] = (int16_t)((_Ed - _Dd ) >> 0); 129 ip[4] = _Ed - _Dd ;
142 130
143 ip[5] = (int16_t)((_Fd + _Bdd ) >> 0); 131 ip[5] = _Fd + _Bdd;
144 ip[6] = (int16_t)((_Fd - _Bdd ) >> 0); 132 ip[6] = _Fd - _Bdd;
145 133
146 } 134 }
147 135
148 ip += 8; /* next row */ 136 ip += 8; /* next row */
149 } 137 }
150 138
151 ip = dequantized_data; 139 ip = input;
152 140
153 for ( i = 0; i < 8; i++) { 141 for ( i = 0; i < 8; i++) {
154 /* Check for non-zero values (bitwise or faster than ||) */ 142 /* Check for non-zero values (bitwise or faster than ||) */
155 if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | 143 if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
156 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { 144 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
157 145
158 t1 = (int32_t)(xC1S7 * ip[1*8]); 146 t1 = (int32_t)(xC1S7 * ip[1*8]);
159 t2 = (int32_t)(xC7S1 * ip[7*8]); 147 t2 = (int32_t)(xC7S1 * ip[7*8]);
160 t1 >>= 16; 148 t1 >>= 16;
220 _Bdd = _Bd - H_; 208 _Bdd = _Bd - H_;
221 209
222 _Fd = F_ - _Ad; 210 _Fd = F_ - _Ad;
223 _Hd = _Bd + H_; 211 _Hd = _Bd + H_;
224 212
213 if(type==1){ //HACK
214 _Gd += 16*128;
215 _Add+= 16*128;
216 _Ed += 16*128;
217 _Fd += 16*128;
218 }
225 _Gd += IdctAdjustBeforeShift; 219 _Gd += IdctAdjustBeforeShift;
226 _Add += IdctAdjustBeforeShift; 220 _Add += IdctAdjustBeforeShift;
227 _Ed += IdctAdjustBeforeShift; 221 _Ed += IdctAdjustBeforeShift;
228 _Fd += IdctAdjustBeforeShift; 222 _Fd += IdctAdjustBeforeShift;
229 223
230 /* Final sequence of operations over-write original inputs. */ 224 /* Final sequence of operations over-write original inputs. */
231 op[0*8] = (int16_t)((_Gd + _Cd ) >> 4); 225 if(type==0){
232 op[7*8] = (int16_t)((_Gd - _Cd ) >> 4); 226 ip[0*8] = (_Gd + _Cd ) >> 4;
233 227 ip[7*8] = (_Gd - _Cd ) >> 4;
234 op[1*8] = (int16_t)((_Add + _Hd ) >> 4); 228
235 op[2*8] = (int16_t)((_Add - _Hd ) >> 4); 229 ip[1*8] = (_Add + _Hd ) >> 4;
236 230 ip[2*8] = (_Add - _Hd ) >> 4;
237 op[3*8] = (int16_t)((_Ed + _Dd ) >> 4); 231
238 op[4*8] = (int16_t)((_Ed - _Dd ) >> 4); 232 ip[3*8] = (_Ed + _Dd ) >> 4;
239 233 ip[4*8] = (_Ed - _Dd ) >> 4;
240 op[5*8] = (int16_t)((_Fd + _Bdd ) >> 4); 234
241 op[6*8] = (int16_t)((_Fd - _Bdd ) >> 4); 235 ip[5*8] = (_Fd + _Bdd ) >> 4;
236 ip[6*8] = (_Fd - _Bdd ) >> 4;
237 }else if(type==1){
238 dst[0*stride] = cm[(_Gd + _Cd ) >> 4];
239 dst[7*stride] = cm[(_Gd - _Cd ) >> 4];
240
241 dst[1*stride] = cm[(_Add + _Hd ) >> 4];
242 dst[2*stride] = cm[(_Add - _Hd ) >> 4];
243
244 dst[3*stride] = cm[(_Ed + _Dd ) >> 4];
245 dst[4*stride] = cm[(_Ed - _Dd ) >> 4];
246
247 dst[5*stride] = cm[(_Fd + _Bdd ) >> 4];
248 dst[6*stride] = cm[(_Fd - _Bdd ) >> 4];
249 }else{
250 dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd ) >> 4)];
251 dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd ) >> 4)];
252
253 dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)];
254 dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)];
255
256 dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd ) >> 4)];
257 dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd ) >> 4)];
258
259 dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)];
260 dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)];
261 }
242 262
243 } else { 263 } else {
244 264 if(type==0){
245 op[0*8] = 0; 265 ip[0*8] =
246 op[7*8] = 0; 266 ip[1*8] =
247 op[1*8] = 0; 267 ip[2*8] =
248 op[2*8] = 0; 268 ip[3*8] =
249 op[3*8] = 0; 269 ip[4*8] =
250 op[4*8] = 0; 270 ip[5*8] =
251 op[5*8] = 0; 271 ip[6*8] =
252 op[6*8] = 0; 272 ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
273 }else if(type==1){
274 dst[0*stride]=
275 dst[1*stride]=
276 dst[2*stride]=
277 dst[3*stride]=
278 dst[4*stride]=
279 dst[5*stride]=
280 dst[6*stride]=
281 dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
282 }else{
283 if(ip[0*8]){
284 int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
285 dst[0*stride] = cm[dst[0*stride] + v];
286 dst[1*stride] = cm[dst[1*stride] + v];
287 dst[2*stride] = cm[dst[2*stride] + v];
288 dst[3*stride] = cm[dst[3*stride] + v];
289 dst[4*stride] = cm[dst[4*stride] + v];
290 dst[5*stride] = cm[dst[5*stride] + v];
291 dst[6*stride] = cm[dst[6*stride] + v];
292 dst[7*stride] = cm[dst[7*stride] + v];
293 }
294 }
253 } 295 }
254 296
255 ip++; /* next column */ 297 ip++; /* next column */
256 op++; 298 dst++;
257 } 299 }
258 } 300 }
301
302 void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
303 idct(NULL, 0, block, 0);
304 }
305
306 void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
307 idct(dest, line_size, block, 1);
308 }
309
310 void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
311 idct(dest, line_size, block, 2);
312 }