Mercurial > libavcodec.hg
comparison vc1dsp.c @ 6157:953c8efd5298 libavcodec
Factor out constant addition
author | kostya |
---|---|
date | Sat, 19 Jan 2008 06:52:29 +0000 |
parents | 90de28dfd8d6 |
children | 2f43560f5dba |
comparison
equal
deleted
inserted
replaced
6156:b5702c981fe2 | 6157:953c8efd5298 |
---|---|
88 DCTELEM *src, *dst; | 88 DCTELEM *src, *dst; |
89 | 89 |
90 src = block; | 90 src = block; |
91 dst = block; | 91 dst = block; |
92 for(i = 0; i < 8; i++){ | 92 for(i = 0; i < 8; i++){ |
93 t1 = 12 * (src[0] + src[4]); | 93 t1 = 12 * (src[0] + src[4]) + 4; |
94 t2 = 12 * (src[0] - src[4]); | 94 t2 = 12 * (src[0] - src[4]) + 4; |
95 t3 = 16 * src[2] + 6 * src[6]; | 95 t3 = 16 * src[2] + 6 * src[6]; |
96 t4 = 6 * src[2] - 16 * src[6]; | 96 t4 = 6 * src[2] - 16 * src[6]; |
97 | 97 |
98 t5 = t1 + t3; | 98 t5 = t1 + t3; |
99 t6 = t2 + t4; | 99 t6 = t2 + t4; |
103 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | 103 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; |
104 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | 104 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; |
105 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | 105 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; |
106 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | 106 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; |
107 | 107 |
108 dst[0] = (t5 + t1 + 4) >> 3; | 108 dst[0] = (t5 + t1) >> 3; |
109 dst[1] = (t6 + t2 + 4) >> 3; | 109 dst[1] = (t6 + t2) >> 3; |
110 dst[2] = (t7 + t3 + 4) >> 3; | 110 dst[2] = (t7 + t3) >> 3; |
111 dst[3] = (t8 + t4 + 4) >> 3; | 111 dst[3] = (t8 + t4) >> 3; |
112 dst[4] = (t8 - t4 + 4) >> 3; | 112 dst[4] = (t8 - t4) >> 3; |
113 dst[5] = (t7 - t3 + 4) >> 3; | 113 dst[5] = (t7 - t3) >> 3; |
114 dst[6] = (t6 - t2 + 4) >> 3; | 114 dst[6] = (t6 - t2) >> 3; |
115 dst[7] = (t5 - t1 + 4) >> 3; | 115 dst[7] = (t5 - t1) >> 3; |
116 | 116 |
117 src += 8; | 117 src += 8; |
118 dst += 8; | 118 dst += 8; |
119 } | 119 } |
120 | 120 |
121 src = block; | 121 src = block; |
122 dst = block; | 122 dst = block; |
123 for(i = 0; i < 8; i++){ | 123 for(i = 0; i < 8; i++){ |
124 t1 = 12 * (src[ 0] + src[32]); | 124 t1 = 12 * (src[ 0] + src[32]) + 64; |
125 t2 = 12 * (src[ 0] - src[32]); | 125 t2 = 12 * (src[ 0] - src[32]) + 64; |
126 t3 = 16 * src[16] + 6 * src[48]; | 126 t3 = 16 * src[16] + 6 * src[48]; |
127 t4 = 6 * src[16] - 16 * src[48]; | 127 t4 = 6 * src[16] - 16 * src[48]; |
128 | 128 |
129 t5 = t1 + t3; | 129 t5 = t1 + t3; |
130 t6 = t2 + t4; | 130 t6 = t2 + t4; |
134 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | 134 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; |
135 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | 135 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; |
136 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | 136 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; |
137 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | 137 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; |
138 | 138 |
139 dst[ 0] = (t5 + t1 + 64) >> 7; | 139 dst[ 0] = (t5 + t1) >> 7; |
140 dst[ 8] = (t6 + t2 + 64) >> 7; | 140 dst[ 8] = (t6 + t2) >> 7; |
141 dst[16] = (t7 + t3 + 64) >> 7; | 141 dst[16] = (t7 + t3) >> 7; |
142 dst[24] = (t8 + t4 + 64) >> 7; | 142 dst[24] = (t8 + t4) >> 7; |
143 dst[32] = (t8 - t4 + 64 + 1) >> 7; | 143 dst[32] = (t8 - t4 + 1) >> 7; |
144 dst[40] = (t7 - t3 + 64 + 1) >> 7; | 144 dst[40] = (t7 - t3 + 1) >> 7; |
145 dst[48] = (t6 - t2 + 64 + 1) >> 7; | 145 dst[48] = (t6 - t2 + 1) >> 7; |
146 dst[56] = (t5 - t1 + 64 + 1) >> 7; | 146 dst[56] = (t5 - t1 + 1) >> 7; |
147 | 147 |
148 src++; | 148 src++; |
149 dst++; | 149 dst++; |
150 } | 150 } |
151 } | 151 } |
160 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 160 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
161 | 161 |
162 src = block; | 162 src = block; |
163 dst = block; | 163 dst = block; |
164 for(i = 0; i < 4; i++){ | 164 for(i = 0; i < 4; i++){ |
165 t1 = 12 * (src[0] + src[4]); | 165 t1 = 12 * (src[0] + src[4]) + 4; |
166 t2 = 12 * (src[0] - src[4]); | 166 t2 = 12 * (src[0] - src[4]) + 4; |
167 t3 = 16 * src[2] + 6 * src[6]; | 167 t3 = 16 * src[2] + 6 * src[6]; |
168 t4 = 6 * src[2] - 16 * src[6]; | 168 t4 = 6 * src[2] - 16 * src[6]; |
169 | 169 |
170 t5 = t1 + t3; | 170 t5 = t1 + t3; |
171 t6 = t2 + t4; | 171 t6 = t2 + t4; |
175 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; | 175 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; |
176 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; | 176 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; |
177 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; | 177 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; |
178 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; | 178 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; |
179 | 179 |
180 dst[0] = (t5 + t1 + 4) >> 3; | 180 dst[0] = (t5 + t1) >> 3; |
181 dst[1] = (t6 + t2 + 4) >> 3; | 181 dst[1] = (t6 + t2) >> 3; |
182 dst[2] = (t7 + t3 + 4) >> 3; | 182 dst[2] = (t7 + t3) >> 3; |
183 dst[3] = (t8 + t4 + 4) >> 3; | 183 dst[3] = (t8 + t4) >> 3; |
184 dst[4] = (t8 - t4 + 4) >> 3; | 184 dst[4] = (t8 - t4) >> 3; |
185 dst[5] = (t7 - t3 + 4) >> 3; | 185 dst[5] = (t7 - t3) >> 3; |
186 dst[6] = (t6 - t2 + 4) >> 3; | 186 dst[6] = (t6 - t2) >> 3; |
187 dst[7] = (t5 - t1 + 4) >> 3; | 187 dst[7] = (t5 - t1) >> 3; |
188 | 188 |
189 src += 8; | 189 src += 8; |
190 dst += 8; | 190 dst += 8; |
191 } | 191 } |
192 | 192 |
193 src = block; | 193 src = block; |
194 for(i = 0; i < 8; i++){ | 194 for(i = 0; i < 8; i++){ |
195 t1 = 17 * (src[ 0] + src[16]); | 195 t1 = 17 * (src[ 0] + src[16]) + 64; |
196 t2 = 17 * (src[ 0] - src[16]); | 196 t2 = 17 * (src[ 0] - src[16]) + 64; |
197 t3 = 22 * src[ 8]; | 197 t3 = 22 * src[ 8]; |
198 t4 = 22 * src[24]; | 198 t4 = 22 * src[24]; |
199 t5 = 10 * src[ 8]; | 199 t5 = 10 * src[ 8]; |
200 t6 = 10 * src[24]; | 200 t6 = 10 * src[24]; |
201 | 201 |
202 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6 + 64) >> 7)]; | 202 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6) >> 7)]; |
203 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5 + 64) >> 7)]; | 203 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5) >> 7)]; |
204 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5 + 64) >> 7)]; | 204 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5) >> 7)]; |
205 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6 + 64) >> 7)]; | 205 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6) >> 7)]; |
206 | 206 |
207 src ++; | 207 src ++; |
208 dest++; | 208 dest++; |
209 } | 209 } |
210 } | 210 } |
219 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 219 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
220 | 220 |
221 src = block; | 221 src = block; |
222 dst = block; | 222 dst = block; |
223 for(i = 0; i < 8; i++){ | 223 for(i = 0; i < 8; i++){ |
224 t1 = 17 * (src[0] + src[2]); | 224 t1 = 17 * (src[0] + src[2]) + 4; |
225 t2 = 17 * (src[0] - src[2]); | 225 t2 = 17 * (src[0] - src[2]) + 4; |
226 t3 = 22 * src[1]; | 226 t3 = 22 * src[1]; |
227 t4 = 22 * src[3]; | 227 t4 = 22 * src[3]; |
228 t5 = 10 * src[1]; | 228 t5 = 10 * src[1]; |
229 t6 = 10 * src[3]; | 229 t6 = 10 * src[3]; |
230 | 230 |
231 dst[0] = (t1 + t3 + t6 + 4) >> 3; | 231 dst[0] = (t1 + t3 + t6) >> 3; |
232 dst[1] = (t2 - t4 + t5 + 4) >> 3; | 232 dst[1] = (t2 - t4 + t5) >> 3; |
233 dst[2] = (t2 + t4 - t5 + 4) >> 3; | 233 dst[2] = (t2 + t4 - t5) >> 3; |
234 dst[3] = (t1 - t3 - t6 + 4) >> 3; | 234 dst[3] = (t1 - t3 - t6) >> 3; |
235 | 235 |
236 src += 8; | 236 src += 8; |
237 dst += 8; | 237 dst += 8; |
238 } | 238 } |
239 | 239 |
240 src = block; | 240 src = block; |
241 for(i = 0; i < 4; i++){ | 241 for(i = 0; i < 4; i++){ |
242 t1 = 12 * (src[ 0] + src[32]); | 242 t1 = 12 * (src[ 0] + src[32]) + 64; |
243 t2 = 12 * (src[ 0] - src[32]); | 243 t2 = 12 * (src[ 0] - src[32]) + 64; |
244 t3 = 16 * src[16] + 6 * src[48]; | 244 t3 = 16 * src[16] + 6 * src[48]; |
245 t4 = 6 * src[16] - 16 * src[48]; | 245 t4 = 6 * src[16] - 16 * src[48]; |
246 | 246 |
247 t5 = t1 + t3; | 247 t5 = t1 + t3; |
248 t6 = t2 + t4; | 248 t6 = t2 + t4; |
252 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; | 252 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; |
253 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; | 253 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; |
254 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | 254 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; |
255 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | 255 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; |
256 | 256 |
257 dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1 + 64) >> 7)]; | 257 dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1) >> 7)]; |
258 dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2 + 64) >> 7)]; | 258 dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2) >> 7)]; |
259 dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3 + 64) >> 7)]; | 259 dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3) >> 7)]; |
260 dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4 + 64) >> 7)]; | 260 dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4) >> 7)]; |
261 dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 64 + 1) >> 7)]; | 261 dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 1) >> 7)]; |
262 dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 64 + 1) >> 7)]; | 262 dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 1) >> 7)]; |
263 dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 64 + 1) >> 7)]; | 263 dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 1) >> 7)]; |
264 dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 64 + 1) >> 7)]; | 264 dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 1) >> 7)]; |
265 | 265 |
266 src ++; | 266 src ++; |
267 dest++; | 267 dest++; |
268 } | 268 } |
269 } | 269 } |
278 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 278 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
279 | 279 |
280 src = block; | 280 src = block; |
281 dst = block; | 281 dst = block; |
282 for(i = 0; i < 4; i++){ | 282 for(i = 0; i < 4; i++){ |
283 t1 = 17 * (src[0] + src[2]); | 283 t1 = 17 * (src[0] + src[2]) + 4; |
284 t2 = 17 * (src[0] - src[2]); | 284 t2 = 17 * (src[0] - src[2]) + 4; |
285 t3 = 22 * src[1]; | 285 t3 = 22 * src[1]; |
286 t4 = 22 * src[3]; | 286 t4 = 22 * src[3]; |
287 t5 = 10 * src[1]; | 287 t5 = 10 * src[1]; |
288 t6 = 10 * src[3]; | 288 t6 = 10 * src[3]; |
289 | 289 |
290 dst[0] = (t1 + t3 + t6 + 4) >> 3; | 290 dst[0] = (t1 + t3 + t6) >> 3; |
291 dst[1] = (t2 - t4 + t5 + 4) >> 3; | 291 dst[1] = (t2 - t4 + t5) >> 3; |
292 dst[2] = (t2 + t4 - t5 + 4) >> 3; | 292 dst[2] = (t2 + t4 - t5) >> 3; |
293 dst[3] = (t1 - t3 - t6 + 4) >> 3; | 293 dst[3] = (t1 - t3 - t6) >> 3; |
294 | 294 |
295 src += 8; | 295 src += 8; |
296 dst += 8; | 296 dst += 8; |
297 } | 297 } |
298 | 298 |
299 src = block; | 299 src = block; |
300 for(i = 0; i < 4; i++){ | 300 for(i = 0; i < 4; i++){ |
301 t1 = 17 * (src[ 0] + src[16]); | 301 t1 = 17 * (src[ 0] + src[16]) + 64; |
302 t2 = 17 * (src[ 0] - src[16]); | 302 t2 = 17 * (src[ 0] - src[16]) + 64; |
303 t3 = 22 * src[ 8]; | 303 t3 = 22 * src[ 8]; |
304 t4 = 22 * src[24]; | 304 t4 = 22 * src[24]; |
305 t5 = 10 * src[ 8]; | 305 t5 = 10 * src[ 8]; |
306 t6 = 10 * src[24]; | 306 t6 = 10 * src[24]; |
307 | 307 |
308 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6 + 64) >> 7)]; | 308 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6) >> 7)]; |
309 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5 + 64) >> 7)]; | 309 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5) >> 7)]; |
310 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5 + 64) >> 7)]; | 310 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5) >> 7)]; |
311 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6 + 64) >> 7)]; | 311 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6) >> 7)]; |
312 | 312 |
313 src ++; | 313 src ++; |
314 dest++; | 314 dest++; |
315 } | 315 } |
316 } | 316 } |