comparison vc1dsp.c @ 6157:953c8efd5298 libavcodec

Factor out constant addition
author kostya
date Sat, 19 Jan 2008 06:52:29 +0000
parents 90de28dfd8d6
children 2f43560f5dba
comparison
equal deleted inserted replaced
6156:b5702c981fe2 6157:953c8efd5298
88 DCTELEM *src, *dst; 88 DCTELEM *src, *dst;
89 89
90 src = block; 90 src = block;
91 dst = block; 91 dst = block;
92 for(i = 0; i < 8; i++){ 92 for(i = 0; i < 8; i++){
93 t1 = 12 * (src[0] + src[4]); 93 t1 = 12 * (src[0] + src[4]) + 4;
94 t2 = 12 * (src[0] - src[4]); 94 t2 = 12 * (src[0] - src[4]) + 4;
95 t3 = 16 * src[2] + 6 * src[6]; 95 t3 = 16 * src[2] + 6 * src[6];
96 t4 = 6 * src[2] - 16 * src[6]; 96 t4 = 6 * src[2] - 16 * src[6];
97 97
98 t5 = t1 + t3; 98 t5 = t1 + t3;
99 t6 = t2 + t4; 99 t6 = t2 + t4;
103 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; 103 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
104 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; 104 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
105 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; 105 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
106 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; 106 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
107 107
108 dst[0] = (t5 + t1 + 4) >> 3; 108 dst[0] = (t5 + t1) >> 3;
109 dst[1] = (t6 + t2 + 4) >> 3; 109 dst[1] = (t6 + t2) >> 3;
110 dst[2] = (t7 + t3 + 4) >> 3; 110 dst[2] = (t7 + t3) >> 3;
111 dst[3] = (t8 + t4 + 4) >> 3; 111 dst[3] = (t8 + t4) >> 3;
112 dst[4] = (t8 - t4 + 4) >> 3; 112 dst[4] = (t8 - t4) >> 3;
113 dst[5] = (t7 - t3 + 4) >> 3; 113 dst[5] = (t7 - t3) >> 3;
114 dst[6] = (t6 - t2 + 4) >> 3; 114 dst[6] = (t6 - t2) >> 3;
115 dst[7] = (t5 - t1 + 4) >> 3; 115 dst[7] = (t5 - t1) >> 3;
116 116
117 src += 8; 117 src += 8;
118 dst += 8; 118 dst += 8;
119 } 119 }
120 120
121 src = block; 121 src = block;
122 dst = block; 122 dst = block;
123 for(i = 0; i < 8; i++){ 123 for(i = 0; i < 8; i++){
124 t1 = 12 * (src[ 0] + src[32]); 124 t1 = 12 * (src[ 0] + src[32]) + 64;
125 t2 = 12 * (src[ 0] - src[32]); 125 t2 = 12 * (src[ 0] - src[32]) + 64;
126 t3 = 16 * src[16] + 6 * src[48]; 126 t3 = 16 * src[16] + 6 * src[48];
127 t4 = 6 * src[16] - 16 * src[48]; 127 t4 = 6 * src[16] - 16 * src[48];
128 128
129 t5 = t1 + t3; 129 t5 = t1 + t3;
130 t6 = t2 + t4; 130 t6 = t2 + t4;
134 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; 134 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
135 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; 135 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
136 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; 136 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
137 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; 137 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
138 138
139 dst[ 0] = (t5 + t1 + 64) >> 7; 139 dst[ 0] = (t5 + t1) >> 7;
140 dst[ 8] = (t6 + t2 + 64) >> 7; 140 dst[ 8] = (t6 + t2) >> 7;
141 dst[16] = (t7 + t3 + 64) >> 7; 141 dst[16] = (t7 + t3) >> 7;
142 dst[24] = (t8 + t4 + 64) >> 7; 142 dst[24] = (t8 + t4) >> 7;
143 dst[32] = (t8 - t4 + 64 + 1) >> 7; 143 dst[32] = (t8 - t4 + 1) >> 7;
144 dst[40] = (t7 - t3 + 64 + 1) >> 7; 144 dst[40] = (t7 - t3 + 1) >> 7;
145 dst[48] = (t6 - t2 + 64 + 1) >> 7; 145 dst[48] = (t6 - t2 + 1) >> 7;
146 dst[56] = (t5 - t1 + 64 + 1) >> 7; 146 dst[56] = (t5 - t1 + 1) >> 7;
147 147
148 src++; 148 src++;
149 dst++; 149 dst++;
150 } 150 }
151 } 151 }
160 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 160 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
161 161
162 src = block; 162 src = block;
163 dst = block; 163 dst = block;
164 for(i = 0; i < 4; i++){ 164 for(i = 0; i < 4; i++){
165 t1 = 12 * (src[0] + src[4]); 165 t1 = 12 * (src[0] + src[4]) + 4;
166 t2 = 12 * (src[0] - src[4]); 166 t2 = 12 * (src[0] - src[4]) + 4;
167 t3 = 16 * src[2] + 6 * src[6]; 167 t3 = 16 * src[2] + 6 * src[6];
168 t4 = 6 * src[2] - 16 * src[6]; 168 t4 = 6 * src[2] - 16 * src[6];
169 169
170 t5 = t1 + t3; 170 t5 = t1 + t3;
171 t6 = t2 + t4; 171 t6 = t2 + t4;
175 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; 175 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
176 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; 176 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
177 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; 177 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
178 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; 178 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
179 179
180 dst[0] = (t5 + t1 + 4) >> 3; 180 dst[0] = (t5 + t1) >> 3;
181 dst[1] = (t6 + t2 + 4) >> 3; 181 dst[1] = (t6 + t2) >> 3;
182 dst[2] = (t7 + t3 + 4) >> 3; 182 dst[2] = (t7 + t3) >> 3;
183 dst[3] = (t8 + t4 + 4) >> 3; 183 dst[3] = (t8 + t4) >> 3;
184 dst[4] = (t8 - t4 + 4) >> 3; 184 dst[4] = (t8 - t4) >> 3;
185 dst[5] = (t7 - t3 + 4) >> 3; 185 dst[5] = (t7 - t3) >> 3;
186 dst[6] = (t6 - t2 + 4) >> 3; 186 dst[6] = (t6 - t2) >> 3;
187 dst[7] = (t5 - t1 + 4) >> 3; 187 dst[7] = (t5 - t1) >> 3;
188 188
189 src += 8; 189 src += 8;
190 dst += 8; 190 dst += 8;
191 } 191 }
192 192
193 src = block; 193 src = block;
194 for(i = 0; i < 8; i++){ 194 for(i = 0; i < 8; i++){
195 t1 = 17 * (src[ 0] + src[16]); 195 t1 = 17 * (src[ 0] + src[16]) + 64;
196 t2 = 17 * (src[ 0] - src[16]); 196 t2 = 17 * (src[ 0] - src[16]) + 64;
197 t3 = 22 * src[ 8]; 197 t3 = 22 * src[ 8];
198 t4 = 22 * src[24]; 198 t4 = 22 * src[24];
199 t5 = 10 * src[ 8]; 199 t5 = 10 * src[ 8];
200 t6 = 10 * src[24]; 200 t6 = 10 * src[24];
201 201
202 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6 + 64) >> 7)]; 202 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6) >> 7)];
203 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5 + 64) >> 7)]; 203 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5) >> 7)];
204 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5 + 64) >> 7)]; 204 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5) >> 7)];
205 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6 + 64) >> 7)]; 205 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6) >> 7)];
206 206
207 src ++; 207 src ++;
208 dest++; 208 dest++;
209 } 209 }
210 } 210 }
219 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 219 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
220 220
221 src = block; 221 src = block;
222 dst = block; 222 dst = block;
223 for(i = 0; i < 8; i++){ 223 for(i = 0; i < 8; i++){
224 t1 = 17 * (src[0] + src[2]); 224 t1 = 17 * (src[0] + src[2]) + 4;
225 t2 = 17 * (src[0] - src[2]); 225 t2 = 17 * (src[0] - src[2]) + 4;
226 t3 = 22 * src[1]; 226 t3 = 22 * src[1];
227 t4 = 22 * src[3]; 227 t4 = 22 * src[3];
228 t5 = 10 * src[1]; 228 t5 = 10 * src[1];
229 t6 = 10 * src[3]; 229 t6 = 10 * src[3];
230 230
231 dst[0] = (t1 + t3 + t6 + 4) >> 3; 231 dst[0] = (t1 + t3 + t6) >> 3;
232 dst[1] = (t2 - t4 + t5 + 4) >> 3; 232 dst[1] = (t2 - t4 + t5) >> 3;
233 dst[2] = (t2 + t4 - t5 + 4) >> 3; 233 dst[2] = (t2 + t4 - t5) >> 3;
234 dst[3] = (t1 - t3 - t6 + 4) >> 3; 234 dst[3] = (t1 - t3 - t6) >> 3;
235 235
236 src += 8; 236 src += 8;
237 dst += 8; 237 dst += 8;
238 } 238 }
239 239
240 src = block; 240 src = block;
241 for(i = 0; i < 4; i++){ 241 for(i = 0; i < 4; i++){
242 t1 = 12 * (src[ 0] + src[32]); 242 t1 = 12 * (src[ 0] + src[32]) + 64;
243 t2 = 12 * (src[ 0] - src[32]); 243 t2 = 12 * (src[ 0] - src[32]) + 64;
244 t3 = 16 * src[16] + 6 * src[48]; 244 t3 = 16 * src[16] + 6 * src[48];
245 t4 = 6 * src[16] - 16 * src[48]; 245 t4 = 6 * src[16] - 16 * src[48];
246 246
247 t5 = t1 + t3; 247 t5 = t1 + t3;
248 t6 = t2 + t4; 248 t6 = t2 + t4;
252 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; 252 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
253 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; 253 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
254 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; 254 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
255 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; 255 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
256 256
257 dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1 + 64) >> 7)]; 257 dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1) >> 7)];
258 dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2 + 64) >> 7)]; 258 dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2) >> 7)];
259 dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3 + 64) >> 7)]; 259 dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3) >> 7)];
260 dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4 + 64) >> 7)]; 260 dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4) >> 7)];
261 dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 64 + 1) >> 7)]; 261 dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 1) >> 7)];
262 dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 64 + 1) >> 7)]; 262 dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 1) >> 7)];
263 dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 64 + 1) >> 7)]; 263 dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 1) >> 7)];
264 dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 64 + 1) >> 7)]; 264 dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 1) >> 7)];
265 265
266 src ++; 266 src ++;
267 dest++; 267 dest++;
268 } 268 }
269 } 269 }
278 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 278 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
279 279
280 src = block; 280 src = block;
281 dst = block; 281 dst = block;
282 for(i = 0; i < 4; i++){ 282 for(i = 0; i < 4; i++){
283 t1 = 17 * (src[0] + src[2]); 283 t1 = 17 * (src[0] + src[2]) + 4;
284 t2 = 17 * (src[0] - src[2]); 284 t2 = 17 * (src[0] - src[2]) + 4;
285 t3 = 22 * src[1]; 285 t3 = 22 * src[1];
286 t4 = 22 * src[3]; 286 t4 = 22 * src[3];
287 t5 = 10 * src[1]; 287 t5 = 10 * src[1];
288 t6 = 10 * src[3]; 288 t6 = 10 * src[3];
289 289
290 dst[0] = (t1 + t3 + t6 + 4) >> 3; 290 dst[0] = (t1 + t3 + t6) >> 3;
291 dst[1] = (t2 - t4 + t5 + 4) >> 3; 291 dst[1] = (t2 - t4 + t5) >> 3;
292 dst[2] = (t2 + t4 - t5 + 4) >> 3; 292 dst[2] = (t2 + t4 - t5) >> 3;
293 dst[3] = (t1 - t3 - t6 + 4) >> 3; 293 dst[3] = (t1 - t3 - t6) >> 3;
294 294
295 src += 8; 295 src += 8;
296 dst += 8; 296 dst += 8;
297 } 297 }
298 298
299 src = block; 299 src = block;
300 for(i = 0; i < 4; i++){ 300 for(i = 0; i < 4; i++){
301 t1 = 17 * (src[ 0] + src[16]); 301 t1 = 17 * (src[ 0] + src[16]) + 64;
302 t2 = 17 * (src[ 0] - src[16]); 302 t2 = 17 * (src[ 0] - src[16]) + 64;
303 t3 = 22 * src[ 8]; 303 t3 = 22 * src[ 8];
304 t4 = 22 * src[24]; 304 t4 = 22 * src[24];
305 t5 = 10 * src[ 8]; 305 t5 = 10 * src[ 8];
306 t6 = 10 * src[24]; 306 t6 = 10 * src[24];
307 307
308 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6 + 64) >> 7)]; 308 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6) >> 7)];
309 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5 + 64) >> 7)]; 309 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5) >> 7)];
310 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5 + 64) >> 7)]; 310 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5) >> 7)];
311 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6 + 64) >> 7)]; 311 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6) >> 7)];
312 312
313 src ++; 313 src ++;
314 dest++; 314 dest++;
315 } 315 }
316 } 316 }