comparison snow.c @ 3206:c1add9fe5c65 libavcodec

Snow mmx + sse2 part 2 Patch by Robert Edele, yartrebo <<at>> earthlink <<dot>> net
author corey
date Mon, 20 Mar 2006 05:52:23 +0000
parents 6b9f0c4fbdbe
children 68721b62a528
comparison
equal deleted inserted replaced
3205:198f8b01424d 3206:c1add9fe5c65
228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
229 //error:0.000033 229 //error:0.000033
230 }; 230 };
231 #elif 1 // 64*linear 231 #elif 1 // 64*linear
232 static const uint8_t obmc32[1024]={ 232 static const uint8_t obmc32[1024]={
233 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 233 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
234 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, 234 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
235 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, 235 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
236 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, 236 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
237 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, 237 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
238 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, 238 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
239 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, 239 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
240 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, 240 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
241 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, 241 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
242 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, 242 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
243 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, 243 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
244 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, 244 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
245 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, 245 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
246 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, 246 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
247 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, 247 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
248 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, 248 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
249 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, 249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, 250 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
251 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, 251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, 252 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
253 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, 253 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
254 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, 254 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
255 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, 255 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
256 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, 256 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
257 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, 257 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
258 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, 258 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
259 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, 259 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
260 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, 260 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
261 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, 261 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
262 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, 262 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
263 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, 263 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
264 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 264 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
265 //error:0.000020 265 //error:0.000020
266 }; 266 };
267 static const uint8_t obmc16[256]={ 267 static const uint8_t obmc16[256]={
268 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, 268 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
269 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, 269 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
270 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, 270 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
271 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, 271 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
272 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, 272 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
273 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, 273 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, 274 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, 275 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, 276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, 277 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, 278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, 279 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
280 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, 280 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
281 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, 281 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
282 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, 282 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
283 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, 283 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
284 //error:0.000015 284 //error:0.000015
285 }; 285 };
286 #else //64*cos 286 #else //64*cos
287 static const uint8_t obmc32[1024]={ 287 static const uint8_t obmc32[1024]={
288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340 }; 340 };
341 #endif 341 #endif
342 342
343 //linear *64 343 //linear *64
344 static const uint8_t obmc8[64]={ 344 static const uint8_t obmc8[64]={
345 1, 3, 5, 7, 7, 5, 3, 1, 345 4, 12, 20, 28, 28, 20, 12, 4,
346 3, 9,15,21,21,15, 9, 3, 346 12, 36, 60, 84, 84, 60, 36, 12,
347 5,15,25,35,35,25,15, 5, 347 20, 60,100,140,140,100, 60, 20,
348 7,21,35,49,49,35,21, 7, 348 28, 84,140,196,196,140, 84, 28,
349 7,21,35,49,49,35,21, 7, 349 28, 84,140,196,196,140, 84, 28,
350 5,15,25,35,35,25,15, 5, 350 20, 60,100,140,140,100, 60, 20,
351 3, 9,15,21,21,15, 9, 3, 351 12, 36, 60, 84, 84, 60, 36, 12,
352 1, 3, 5, 7, 7, 5, 3, 1, 352 4, 12, 20, 28, 28, 20, 12, 4,
353 //error:0.000000 353 //error:0.000000
354 }; 354 };
355 355
356 //linear *64 356 //linear *64
357 static const uint8_t obmc4[16]={ 357 static const uint8_t obmc4[16]={
358 4,12,12, 4, 358 16, 48, 48, 16,
359 12,36,36,12, 359 48,144,144, 48,
360 12,36,36,12, 360 48,144,144, 48,
361 4,12,12, 4, 361 16, 48, 48, 16,
362 //error:0.000000 362 //error:0.000000
363 }; 363 };
364 364
365 static const uint8_t *obmc_tab[4]={ 365 static const uint8_t *obmc_tab[4]={
366 obmc32, obmc16, obmc8, obmc4 366 obmc32, obmc16, obmc8, obmc4
2960 } 2960 }
2961 } 2961 }
2962 } 2962 }
2963 *b= backup; 2963 *b= backup;
2964 2964
2965 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping 2965 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2966 } 2966 }
2967 2967
2968 static inline int get_block_bits(SnowContext *s, int x, int y, int w){ 2968 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2969 const int b_stride = s->b_width << s->block_max_depth; 2969 const int b_stride = s->b_width << s->block_max_depth;
2970 const int b_height = s->b_height<< s->block_max_depth; 2970 const int b_height = s->b_height<< s->block_max_depth;
3020 int distortion; 3020 int distortion;
3021 int rate= 0; 3021 int rate= 0;
3022 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); 3022 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3023 int sx= block_w*mb_x - block_w/2; 3023 int sx= block_w*mb_x - block_w/2;
3024 int sy= block_w*mb_y - block_w/2; 3024 int sy= block_w*mb_y - block_w/2;
3025 const int x0= FFMAX(0,-sx); 3025 int x0= FFMAX(0,-sx);
3026 const int y0= FFMAX(0,-sy); 3026 int y0= FFMAX(0,-sy);
3027 const int x1= FFMIN(block_w*2, w-sx); 3027 int x1= FFMIN(block_w*2, w-sx);
3028 const int y1= FFMIN(block_w*2, h-sy); 3028 int y1= FFMIN(block_w*2, h-sy);
3029 int i,x,y; 3029 int i,x,y;
3030 3030
3031 pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); 3031 pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3032 3032
3033 for(y=y0; y<y1; y++){ 3033 for(y=y0; y<y1; y++){
3039 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); 3039 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3040 v = (v + pred1[x]) >> FRAC_BITS; 3040 v = (v + pred1[x]) >> FRAC_BITS;
3041 if(v&(~255)) v= ~(v>>31); 3041 if(v&(~255)) v= ~(v>>31);
3042 dst1[x] = v; 3042 dst1[x] = v;
3043 } 3043 }
3044 }
3045
3046 /* copy the regions where obmc[] = (uint8_t)256 */
3047 if(LOG2_OBMC_MAX == 8
3048 && (mb_x == 0 || mb_x == b_stride-1)
3049 && (mb_y == 0 || mb_y == b_height-1)){
3050 if(mb_x == 0)
3051 x1 = block_w;
3052 else
3053 x0 = block_w;
3054 if(mb_y == 0)
3055 y1 = block_w;
3056 else
3057 y0 = block_w;
3058 for(y=y0; y<y1; y++)
3059 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3044 } 3060 }
3045 3061
3046 //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block 3062 //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
3047 if(block_w==16){ 3063 if(block_w==16){
3048 distortion = 0; 3064 distortion = 0;