Mercurial > libavcodec.hg
comparison i386/h264dsp_mmx.c @ 8375:de2509cf3c44 libavcodec
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
thus avoiding the calling overhead.
New functions are not yet used.
author | michael |
---|---|
date | Thu, 18 Dec 2008 02:36:48 +0000 |
parents | eebc7209c47f |
children |
comparison
equal
deleted
inserted
replaced
8374:9000fd7c166e | 8375:de2509cf3c44 |
---|---|
374 "+m"(*(uint64_t*)(dst+3*stride)) | 374 "+m"(*(uint64_t*)(dst+3*stride)) |
375 ); | 375 ); |
376 } | 376 } |
377 } | 377 } |
378 | 378 |
379 //FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split | |
380 static const uint8_t scan8[16 + 2*4]={ | |
381 4+1*8, 5+1*8, 4+2*8, 5+2*8, | |
382 6+1*8, 7+1*8, 6+2*8, 7+2*8, | |
383 4+3*8, 5+3*8, 4+4*8, 5+4*8, | |
384 6+3*8, 7+3*8, 6+4*8, 7+4*8, | |
385 1+1*8, 2+1*8, | |
386 1+2*8, 2+2*8, | |
387 1+4*8, 2+4*8, | |
388 1+5*8, 2+5*8, | |
389 }; | |
390 | |
391 static void ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
392 int i; | |
393 for(i=0; i<16; i++){ | |
394 if(nnzc[ scan8[i] ]) | |
395 ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride); | |
396 } | |
397 } | |
398 | |
399 static void ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
400 int i; | |
401 for(i=0; i<16; i+=4){ | |
402 if(nnzc[ scan8[i] ]) | |
403 ff_h264_idct8_add_mmx(dst + block_offset[i], block + i*16, stride); | |
404 } | |
405 } | |
406 | |
407 | |
408 static void ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
409 int i; | |
410 for(i=0; i<16; i++){ | |
411 int nnz = nnzc[ scan8[i] ]; | |
412 if(nnz){ | |
413 if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride); | |
414 else ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride); | |
415 } | |
416 } | |
417 } | |
418 | |
419 static void ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
420 int i; | |
421 for(i=0; i<16; i++){ | |
422 if(nnzc[ scan8[i] ] || block[i*16]) | |
423 ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride); | |
424 } | |
425 } | |
426 | |
427 static void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
428 int i; | |
429 for(i=0; i<16; i++){ | |
430 if(nnzc[ scan8[i] ]) ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride); | |
431 else if(block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride); | |
432 } | |
433 } | |
434 | |
435 static void ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
436 int i; | |
437 for(i=0; i<16; i+=4){ | |
438 int nnz = nnzc[ scan8[i] ]; | |
439 if(nnz){ | |
440 if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride); | |
441 else ff_h264_idct8_add_mmx (dst + block_offset[i], block + i*16, stride); | |
442 } | |
443 } | |
444 } | |
445 | |
446 static void ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
447 int i; | |
448 for(i=0; i<16; i+=4){ | |
449 int nnz = nnzc[ scan8[i] ]; | |
450 if(nnz){ | |
451 if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride); | |
452 else ff_h264_idct8_add_sse2 (dst + block_offset[i], block + i*16, stride); | |
453 } | |
454 } | |
455 } | |
456 | |
457 static void ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
458 int i; | |
459 for(i=16; i<16+8; i++){ | |
460 if(nnzc[ scan8[i] ] || block[i*16]) | |
461 ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride); | |
462 } | |
463 } | |
464 | |
465 static void ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ | |
466 int i; | |
467 for(i=16; i<16+8; i++){ | |
468 if(nnzc[ scan8[i] ]) | |
469 ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride); | |
470 else if(block[i*16]) | |
471 ff_h264_idct_dc_add_mmx2(dest[(i&4)>>2] + block_offset[i], block + i*16, stride); | |
472 } | |
473 } | |
379 | 474 |
380 /***********************************/ | 475 /***********************************/ |
381 /* deblocking */ | 476 /* deblocking */ |
382 | 477 |
383 // out: o = |x-y|>a | 478 // out: o = |x-y|>a |